9191 fstype : nfs
9292 opts : rw,sync
9393 state : mounted
94+ register : nfs_mount_result
95+ ignore_errors : true
96+
97+ - name : Fail gracefully if NFS mount is not available
98+ debug :
99+ msg : " NFS mount failed. Skipping compute initialization. Re-image if this persists."
100+ when : nfs_mount_result.failed
94101
95102 - name : Copy /mnt/cluster/hosts contents to /etc/hosts
96103 copy :
99106 owner : root
100107 group : root
101108 mode : 0644
109+ when : not nfs_mount_result.failed
102110 when : iam_slurm_compute | bool
103111
104- # - name: Include hostvars from NFS share
105- # block:
106- # - name: Extract short hostname using a shell block
107- # shell: |
108- # HOSTNAME=$(hostname)
109- # echo "${HOSTNAME%.test.invalid}"
110- # register: short_hostname
111112
112- # - name: Include vars from NFS mount
113- # include_vars:
114- # file: "/mnt/cluster/{{ short_hostname.stdout }}/hostvars.yml"
115- # when: iam_slurm_compute | bool
113+ - name : Include hostvars from NFS share
114+ block :
115+ - name : Extract short hostname using a shell block
116+ shell : |
117+ HOSTNAME=$(hostname)
118+ echo "${HOSTNAME%.test.invalid}"
119+ register : short_hostname
120+
121+ - name : Include vars from NFS mount
122+ include_vars :
123+ file : " /mnt/cluster/{{ short_hostname.stdout }}/hostvars.yml"
124+ when :
125+ - iam_slurm_compute | bool
126+ - not nfs_mount_result.failed
116127
117128
118129 - name : NFS client mount
131142 fstype : nfs
132143 state : " {{ item.get('nfs_client_mnt_state', nfs_client_mnt_state) }}"
133144 loop : " {{ nfs_configurations }}"
134- when : iam_slurm_compute | bool
145+ when :
146+ - iam_slurm_compute | bool
147+ - not nfs_mount_result.failed
135148
136149
137150 - name : Manila mount
216229 loop_control :
217230 label : " {{ item.share_name }}"
218231 when : item.mount_state | default(os_manila_mount_state) in ['mounted' or 'ephemeral']
219- when : iam_slurm_compute | bool
232+ when :
233+ - iam_slurm_compute | bool
234+ - not nfs_mount_result.failed
220235
221236
222237 - name : Basic users setup
241256 loop_control :
242257 label : " {{ item.name }}"
243258 when : " 'sudo' in item"
244- when : iam_slurm_compute | bool
259+ when :
260+ - iam_slurm_compute | bool
261+ - not nfs_mount_result.failed
245262
246263
247264 - name : Configure EESSI
258275 - name : Ensure CVMFS config is setup
259276 command :
260277 cmd : " cvmfs_config setup"
261- when : iam_slurm_compute | bool
278+ when :
279+ - iam_slurm_compute | bool
280+ - not nfs_mount_result.failed
262281
263282
264283 - name : Configure openhpc
300319 name : slurmd
301320 enabled : true
302321 state : started
303- when : iam_slurm_compute | bool
322+ when :
323+ - iam_slurm_compute | bool
324+ - not nfs_mount_result.failed
0 commit comments