diff --git a/ansible/roles/compute_init/files/compute-init.yml b/ansible/roles/compute_init/files/compute-init.yml index b09bd7f3b..25af01154 100644 --- a/ansible/roles/compute_init/files/compute-init.yml +++ b/ansible/roles/compute_init/files/compute-init.yml @@ -61,7 +61,13 @@ owner: slurm group: root mode: u=rX,g=rwX,o= - + + - name: Wait for NFS to reachable (checks host network up) + ansible.builtin.wait_for: + port: 2049 + host: '{{ server_node_ip }}' + timeout: 120 + - name: Mount /mnt/cluster mount: path: /mnt/cluster @@ -70,8 +76,6 @@ opts: ro,sync state: mounted register: _mount_mnt_cluster - ignore_errors: true - # TODO: add some retries here? - block: - name: Report skipping initialization if cannot mount nfs diff --git a/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json b/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json index 7e213c00c..01f61fa0a 100644 --- a/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json +++ b/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json @@ -1,6 +1,6 @@ { "cluster_image": { - "RL8": "openhpc-RL8-250326-1048-3e132168", - "RL9": "openhpc-RL9-250326-1049-3e132168" + "RL8": "openhpc-RL8-250331-1627-cccd6c9c", + "RL9": "openhpc-RL9-250331-1627-cccd6c9c" } }