diff --git a/ansible/roles/compute_init/README.md b/ansible/roles/compute_init/README.md index 16f0987e9..e64ea6ffb 100644 --- a/ansible/roles/compute_init/README.md +++ b/ansible/roles/compute_init/README.md @@ -151,7 +151,11 @@ a new image: 3. Add metadata to a compute node e.g. via Horizon to turn on compute-init playbook functionality. -4. Fake an image build to deploy the compute-init playbook: +4. Stop ansible-init from running + + ansible all -ba "systemctl stop ansible-init" + +5. Fake an image build to deploy the compute-init playbook: ansible-playbook ansible/fatimage.yml --tags compute_init @@ -159,16 +163,13 @@ a new image: in the builder group, which conveniently means any changes made to that play also get picked up. -5. Fake a reimage of compute to run ansible-init and the compute-init playbook: - - On compute node where metadata was added: +6. Fake a reimage of compute to run ansible-init and the updated compute-init playbook: - [root@rl9-compute-0 rocky]# rm -f /var/lib/ansible-init.done && systemctl restart ansible-init - [root@rl9-compute-0 rocky]# systemctl status ansible-init + ansible all -ba "rm -f /var/lib/ansible-init.done && systemctl restart ansible-init" Use `systemctl status ansible-init` to view stdout/stderr from Ansible. -Steps 4/5 can be repeated with changes to the compute script. If required, +Steps 4/5/6 can be repeated with changes to the compute script. If required, reimage the compute node(s) first as in step 2 and/or add additional metadata as in step 3. diff --git a/ansible/roles/compute_init/files/compute-init.yml b/ansible/roles/compute_init/files/compute-init.yml index 906961c96..bf486f5b2 100644 --- a/ansible/roles/compute_init/files/compute-init.yml +++ b/ansible/roles/compute_init/files/compute-init.yml @@ -95,6 +95,20 @@ - meta: end_play when: _mount_mnt_cluster.failed + - name: Check if hostvars exist + stat: + path: "/mnt/cluster/hostvars/{{ ansible_hostname }}/hostvars.yml" + register: hostvars_stat + + - block: + - name: Report skipping initialization if host vars does not exist + # meta: end_play produces no output + debug: + msg: "Skipping compute initialization: hostvars does not exist" + + - meta: end_play + when: not hostvars_stat.stat.exists + - name: Load hostvars from NFS # this is higher priority than vars block = normal ansible's hostvars include_vars: diff --git a/ansible/roles/compute_init/tasks/install.yml b/ansible/roles/compute_init/tasks/install.yml index cbacb062e..8288b65fe 100644 --- a/ansible/roles/compute_init/tasks/install.yml +++ b/ansible/roles/compute_init/tasks/install.yml @@ -16,12 +16,14 @@ - roles - name: Inject files from roles - copy: + synchronize: src: '{{ item.src }}' dest: '/etc/ansible-init/playbooks/{{ item.dest }}' - owner: root - group: root - mode: 0644 + archive: false + rsync_opts: ["-p", "--chmod=D770,F644", "--owner=root", "--group=root"] + recursive: true + use_ssh_args: true + become: true loop: - src: ../../resolv_conf/templates/resolv.conf.j2 dest: templates/resolv.conf.j2 @@ -60,7 +62,7 @@ - name: Add compute initialisation playbook copy: src: compute-init.yml - dest: /etc/ansible-init/playbooks/1-compute-init.yml + dest: /etc/ansible-init/playbooks/10-compute-init.yml owner: root group: root mode: 0644 diff --git a/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json b/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json index 3e8293206..0af12befc 100644 --- a/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json +++ b/environments/.stackhpc/tofu/cluster_image.auto.tfvars.json @@ -1,6 +1,6 @@ { "cluster_image": { - "RL8": "openhpc-RL8-250305-1110-534ed276", - "RL9": "openhpc-RL9-250305-1110-534ed276" + "RL8": "openhpc-RL8-250311-1020-d05208bc", + "RL9": "openhpc-RL9-250311-1020-d05208bc" } }