Skip to content

Commit fc38aab

Browse files
authored
Merge pull request #627 from stackhpc/feat/root-squash-compute-init
Compute-init: cope with root-squashed nfs clients
2 parents 120bcfc + 74deca8 commit fc38aab

File tree

10 files changed

+71
-87
lines changed

10 files changed

+71
-87
lines changed

ansible/roles/basic_users/tasks/main.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@
6666
when:
6767
- item.state | default('present') == 'present'
6868
- item.create_home | default(true) | bool
69-
- inventory_hostname == basic_users_homedir_server
69+
- ansible_hostname == basic_users_homedir_server
7070

7171
# The following tasks run on a single *client* node, so that home directory
7272
# paths are easily constructed, becoming each user so that root-squash
@@ -85,7 +85,7 @@
8585
when:
8686
- item.state | default('present') == 'present'
8787
- item.generate_ssh_key | default(true) | bool or item.public_key is defined
88-
- inventory_hostname == basic_users_homedir_client
88+
- ansible_hostname == basic_users_homedir_client
8989

9090
- name: Generate cluster ssh key
9191
community.crypto.openssh_keypair:
@@ -101,7 +101,7 @@
101101
when:
102102
- item.state | default('present') == 'present'
103103
- item.generate_ssh_key | default(true)
104-
- inventory_hostname == basic_users_homedir_client
104+
- ansible_hostname == basic_users_homedir_client
105105
register: _cluster_ssh_keypair
106106

107107
- name: Write generated cluster ssh key to authorized_keys
@@ -118,7 +118,7 @@
118118
when:
119119
- item.item.state | default('present') == 'present'
120120
- item.item.generate_ssh_key | default(true)
121-
- inventory_hostname == basic_users_homedir_client
121+
- ansible_hostname == basic_users_homedir_client
122122
- item.public_key is defined # NB this is the *returned* public key
123123

124124
- name: Write supplied public key to authorized_keys
@@ -134,5 +134,5 @@
134134
label: "{{ item.name }}"
135135
when:
136136
- item.state | default('present') == 'present'
137-
- inventory_hostname == basic_users_homedir_client
137+
- ansible_hostname == basic_users_homedir_client
138138
- item.public_key is defined # NB this is the *provided* public key

ansible/roles/cacerts/tasks/export.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
copy:
33
src: "{{ item }}"
44
dest: /exports/cluster/cacerts/
5-
owner: root
5+
owner: slurm
66
group: root
77
mode: 0644
88
with_fileglob:

ansible/roles/compute_init/files/compute-init.yml

Lines changed: 36 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,9 @@
2929
tuned_enabled: true
3030
tuned_started: true
3131

32-
nfs_client_mnt_point: "/mnt"
33-
nfs_client_mnt_options:
34-
nfs_client_mnt_state: mounted
35-
nfs_configurations:
3632
nfs_enable:
3733
clients: false
3834

39-
# openhpc: no defaults required
40-
4135
os_manila_mount_shares: []
4236
os_manila_mount_ceph_conf_path: /etc/ceph
4337
os_manila_mount_state: mounted
@@ -47,15 +41,8 @@
4741
- noatime
4842
- _netdev # prevents mount blocking early boot before networking available
4943
- rw
50-
51-
basic_users_groups: []
52-
basic_users_manage_homedir: false # homedir must already exist on shared filesystem
53-
basic_users_userdefaults:
54-
state: present
55-
create_home: "{{ basic_users_manage_homedir }}"
56-
generate_ssh_key: "{{ basic_users_manage_homedir }}"
57-
ssh_key_comment: "{{ item.name }}"
58-
basic_users_users: []
44+
- nodev
45+
- nosuid
5946

6047
tasks:
6148
- block:
@@ -96,6 +83,7 @@
9683
when: _mount_mnt_cluster.failed
9784

9885
- name: Check if hostvars exist
86+
become_user: slurm
9987
stat:
10088
path: "/mnt/cluster/hostvars/{{ ansible_hostname }}/hostvars.yml"
10189
register: hostvars_stat
@@ -109,17 +97,33 @@
10997
- meta: end_play
11098
when: not hostvars_stat.stat.exists
11199

112-
- name: Load hostvars from NFS
100+
- name: Sync /mnt/cluster to /var/tmp
101+
become_user: slurm
102+
synchronize:
103+
src: "/mnt/cluster/"
104+
dest: "/var/tmp/cluster/"
105+
archive: yes
106+
recursive: yes
107+
108+
- name: Unmount /mnt/cluster after sync
109+
mount:
110+
path: /mnt/cluster
111+
state: unmounted
112+
113+
- name: Load hostvars
113114
# this is higher priority than vars block = normal ansible's hostvars
114115
include_vars:
115-
file: "/mnt/cluster/hostvars/{{ ansible_hostname }}/hostvars.yml" # can't use inventory_hostname
116-
117-
# TODO: should /mnt/cluster now be UNMOUNTED to avoid future hang-ups?
116+
file: "/var/tmp/cluster/hostvars/{{ ansible_hostname }}/hostvars.yml"
118117

119118
- name: Run chrony role
120119
ansible.builtin.include_role:
121120
name: mrlesmithjr.chrony
122-
when: enable_chrony | bool
121+
tasks_from: config_chrony.yml
122+
vars:
123+
# workaround for set_facts.yml:
124+
chrony_config: /etc/chrony.conf
125+
chrony_service: chronyd
126+
when: enable_chrony
123127

124128
- name: Configure resolve.conf
125129
block:
@@ -149,7 +153,7 @@
149153

150154
- name: Copy cluster /etc/hosts
151155
copy:
152-
src: /mnt/cluster/hosts
156+
src: /var/tmp/cluster/hosts
153157
dest: /etc/hosts
154158
owner: root
155159
group: root
@@ -160,14 +164,14 @@
160164
ansible.builtin.include_role:
161165
name: cacerts
162166
vars:
163-
cacerts_cert_dir: "/mnt/cluster/cacerts"
167+
cacerts_cert_dir: "/var/tmp/cluster/cacerts"
164168
when: enable_cacerts
165169

166170
- name: Configure sshd
167171
ansible.builtin.include_role:
168172
name: sshd
169173
vars:
170-
sshd_conf_src: "/mnt/cluster/hostconfig/{{ ansible_hostname }}/sshd.conf"
174+
sshd_conf_src: "/var/tmp/cluster/hostconfig/{{ ansible_hostname }}/sshd.conf"
171175
when: enable_sshd
172176

173177
- name: Configure tuned
@@ -179,22 +183,24 @@
179183
name: sssd
180184
tasks_from: configure.yml
181185
vars:
182-
sssd_conf_src: "/mnt/cluster/hostconfig/{{ ansible_hostname }}/sssd.conf"
186+
sssd_conf_src: "/var/tmp/cluster/hostconfig/{{ ansible_hostname }}/sssd.conf"
183187
when: enable_sssd
184188

185189
# NFS client mount
186190
- name: If nfs-clients is present
187-
include_tasks: tasks/nfs-clients.yml
191+
ansible.builtin.include_role:
192+
name: stackhpc.nfs
193+
tasks_from: nfs-clients.yml
188194
when:
189195
- enable_nfs
190-
- nfs_enable.clients | bool or ('nfs_enable' in item and item.nfs_enable.clients | bool)
196+
- nfs_enable.clients | default(item.nfs_enable.clients) | bool
191197
loop: "{{ nfs_configurations }}"
192198

193199
- name: Manila mounts
194200
block:
195201
- name: Read manila share info from nfs file
196202
include_vars:
197-
file: /mnt/cluster/manila_share_info.yml
203+
file: /var/tmp/cluster/manila_share_info.yml
198204
no_log: true # contains secrets
199205

200206
- name: Ensure Ceph configuration directory exists
@@ -269,34 +275,15 @@
269275
when: enable_lustre
270276

271277
- name: Basic users
272-
block:
273-
- name: Create groups
274-
ansible.builtin.group: "{{ item }}"
275-
loop: "{{ basic_users_groups }}"
276-
277-
- name: Create users
278-
user: "{{ basic_users_userdefaults | combine(item) | filter_user_params() }}"
279-
loop: "{{ basic_users_users }}"
280-
loop_control:
281-
label: "{{ item.name }} [{{ item.state | default('present') }}]"
282-
register: basic_users_info
283-
284-
- name: Write sudo rules
285-
blockinfile:
286-
path: /etc/sudoers.d/80-{{ item.name}}-user
287-
block: "{{ item.sudo }}"
288-
create: true
289-
loop: "{{ basic_users_users }}"
290-
loop_control:
291-
label: "{{ item.name }}"
292-
when: "'sudo' in item"
278+
ansible.builtin.include_role:
279+
name: basic_users
293280
when: enable_basic_users
294281

295282
- name: EESSI
296283
block:
297284
- name: Copy cvmfs config
298285
copy:
299-
src: /mnt/cluster/cvmfs/default.local
286+
src: /var/tmp/cluster/cvmfs/default.local
300287
dest: /etc/cvmfs/default.local
301288
owner: root
302289
group: root

ansible/roles/compute_init/tasks/export.yml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@
1212
copy:
1313
src: /etc/hosts
1414
dest: /exports/cluster/hosts
15-
owner: root
15+
owner: slurm
1616
group: root
17-
mode: u=rw,go=
17+
mode: u=r,g=rw,o=
1818
remote_src: true
1919
run_once: true
2020
delegate_to: "{{ groups['control'] | first }}"
@@ -41,9 +41,9 @@
4141
copy:
4242
content: "{{ os_manila_mount_share_info_var | to_nice_yaml }}"
4343
dest: /exports/cluster/manila_share_info.yml
44-
owner: root
44+
owner: slurm
4545
group: root
46-
mode: u=rw,g=r
46+
mode: u=r,g=rw,o=
4747
run_once: true
4848
delegate_to: "{{ groups['control'] | first }}"
4949
when: os_manila_mount_share_info is defined
@@ -55,7 +55,7 @@
5555
file:
5656
path: /exports/cluster/cvmfs
5757
state: directory
58-
owner: root
58+
owner: slurm
5959
group: root
6060
mode: 0755
6161
run_once: true
@@ -65,7 +65,7 @@
6565
copy:
6666
src: /etc/cvmfs/default.local
6767
dest: /exports/cluster/cvmfs/default.local
68-
owner: root
68+
owner: slurm
6969
group: root
7070
mode: 0644
7171
remote_src: true
@@ -82,9 +82,9 @@
8282
file:
8383
path: "/exports/cluster/hostconfig/{{ inventory_hostname }}/"
8484
state: directory
85-
owner: root
85+
owner: slurm
8686
group: root
87-
mode: u=rw,go=
87+
mode: u=rX,g=rwX,o=
8888
delegate_to: "{{ groups['control'] | first }}"
8989

9090
- name: Template sssd config

ansible/roles/compute_init/tasks/install.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@
3333
dest: templates/ceph.keyring.j2
3434
- src: ../../resolv_conf/files/NetworkManager-dns-none.conf
3535
dest: files/NetworkManager-dns-none.conf
36-
- src: ../../basic_users/filter_plugins/filter_keys.py
37-
dest: filter_plugins/filter_keys.py
36+
- src: ../../basic_users
37+
dest: roles/
3838
- src: ../../cacerts
3939
dest: roles/
4040
- src: ../../sssd
@@ -43,8 +43,8 @@
4343
dest: roles/
4444
- src: ../../tuned/tasks/configure.yml
4545
dest: tasks/tuned.yml
46-
- src: ../../stackhpc.nfs/tasks/nfs-clients.yml
47-
dest: tasks/nfs-clients.yml
46+
- src: ../../stackhpc.nfs
47+
dest: roles/
4848
- src: ../../mrlesmithjr.chrony
4949
dest: roles/
5050
- src: ../../lustre

environments/.stackhpc/inventory/group_vars/all/nfs.yml

Lines changed: 0 additions & 17 deletions
This file was deleted.
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"cluster_image": {
3-
"RL8": "openhpc-RL8-250312-1522-7e5c051d",
4-
"RL9": "openhpc-RL9-250312-1435-7e5c051d"
3+
"RL8": "openhpc-RL8-250319-1045-69713f23",
4+
"RL9": "openhpc-RL9-250319-1045-69713f23"
55
}
66
}

environments/common/inventory/group_vars/all/ansible_init.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
ansible_init_wait: 1200 # seconds
1+
ansible_init_wait: 300 # seconds
22

33
ansible_init_pip_packages:
44
# role defaults:

environments/common/inventory/group_vars/all/basic_users.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,7 @@
33
# See ansible/roles/basic_users/README.md for variable definitions.
44

55
basic_users_users: []
6+
7+
# The following are defined for the purpose of compute-init
8+
basic_users_homedir_server: "{{ groups['control'] | first }}"
9+
basic_users_homedir_client: "{{ groups['login'] | first }}"

environments/common/inventory/group_vars/all/nfs.yml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,13 @@ nfs_configurations:
2828
# NB: this is set as default for all shares above but is repeated here
2929
# in case nfs_export_clients is overriden
3030
nfs_export_clients: "{{ _nfs_node_ips }}"
31+
32+
- comment: Export /exports/cluster from Slurm control node
33+
nfs_enable:
34+
server: "{{ inventory_hostname in groups['control'] }}"
35+
clients: false
36+
nfs_export: "/exports/cluster"
37+
# prevent non-cluster IPs mounting the share:
38+
# NB: this is set as default for all shares above but is repeated here
39+
# in case nfs_export_clients is overriden
40+
nfs_export_clients: "{{ _nfs_node_ips }}"

0 commit comments

Comments
 (0)