Skip to content

Commit a43a5f9

Browse files
committed
fail gracefully when NFS server not up
1 parent 07ed822 commit a43a5f9

File tree

1 file changed

+37
-16
lines changed

1 file changed

+37
-16
lines changed

ansible/roles/compute_init/files/compute-init.yml

Lines changed: 37 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,13 @@
9191
fstype: nfs
9292
opts: rw,sync
9393
state: mounted
94+
register: nfs_mount_result
95+
ignore_errors: true
96+
97+
- name: Fail gracefully if NFS mount is not available
98+
debug:
99+
msg: "NFS mount failed. Skipping compute initialization. Re-image if this persists."
100+
when: nfs_mount_result.failed
94101

95102
- name: Copy /mnt/cluster/hosts contents to /etc/hosts
96103
copy:
@@ -99,20 +106,24 @@
99106
owner: root
100107
group: root
101108
mode: 0644
109+
when: not nfs_mount_result.failed
102110
when: iam_slurm_compute | bool
103111

104-
# - name: Include hostvars from NFS share
105-
# block:
106-
# - name: Extract short hostname using a shell block
107-
# shell: |
108-
# HOSTNAME=$(hostname)
109-
# echo "${HOSTNAME%.test.invalid}"
110-
# register: short_hostname
111112

112-
# - name: Include vars from NFS mount
113-
# include_vars:
114-
# file: "/mnt/cluster/{{ short_hostname.stdout }}/hostvars.yml"
115-
# when: iam_slurm_compute | bool
113+
- name: Include hostvars from NFS share
114+
block:
115+
- name: Extract short hostname using a shell block
116+
shell: |
117+
HOSTNAME=$(hostname)
118+
echo "${HOSTNAME%.test.invalid}"
119+
register: short_hostname
120+
121+
- name: Include vars from NFS mount
122+
include_vars:
123+
file: "/mnt/cluster/{{ short_hostname.stdout }}/hostvars.yml"
124+
when:
125+
- iam_slurm_compute | bool
126+
- not nfs_mount_result.failed
116127

117128

118129
- name: NFS client mount
@@ -131,7 +142,9 @@
131142
fstype: nfs
132143
state: "{{ item.get('nfs_client_mnt_state', nfs_client_mnt_state) }}"
133144
loop: "{{ nfs_configurations }}"
134-
when: iam_slurm_compute | bool
145+
when:
146+
- iam_slurm_compute | bool
147+
- not nfs_mount_result.failed
135148

136149

137150
- name: Manila mount
@@ -216,7 +229,9 @@
216229
loop_control:
217230
label: "{{ item.share_name }}"
218231
when: item.mount_state | default(os_manila_mount_state) in ['mounted' or 'ephemeral']
219-
when: iam_slurm_compute | bool
232+
when:
233+
- iam_slurm_compute | bool
234+
- not nfs_mount_result.failed
220235

221236

222237
- name: Basic users setup
@@ -241,7 +256,9 @@
241256
loop_control:
242257
label: "{{ item.name }}"
243258
when: "'sudo' in item"
244-
when: iam_slurm_compute | bool
259+
when:
260+
- iam_slurm_compute | bool
261+
- not nfs_mount_result.failed
245262

246263

247264
- name: Configure EESSI
@@ -258,7 +275,9 @@
258275
- name: Ensure CVMFS config is setup
259276
command:
260277
cmd: "cvmfs_config setup"
261-
when: iam_slurm_compute | bool
278+
when:
279+
- iam_slurm_compute | bool
280+
- not nfs_mount_result.failed
262281

263282

264283
- name: Configure openhpc
@@ -300,4 +319,6 @@
300319
name: slurmd
301320
enabled: true
302321
state: started
303-
when: iam_slurm_compute | bool
322+
when:
323+
- iam_slurm_compute | bool
324+
- not nfs_mount_result.failed

0 commit comments

Comments
 (0)