Skip to content

Commit c9ebd48

Browse files
committed
compute-init stage 1 working
1 parent 1a400db commit c9ebd48

File tree

2 files changed

+101
-1
lines changed

2 files changed

+101
-1
lines changed

ansible/extras.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848
tasks_from: export.yml
4949

5050
# TODO: really this should only run during build
51-
# but handy not to for debugging
51+
# but handy not to for debugging without build
5252
- name: Install compute_init script
5353
hosts: compute_init
5454
tags: compute_init

docs/experimental/compute-init.md

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
2+
To develop/debug this without actually having to build an image:
3+
4+
On deploy host:
5+
6+
.stackhpc/ (venv) [rocky@steveb-dev slurm-app-rl9]$ ansible-playbook ansible/extras.yml --tags compute_init
7+
8+
On compute node:
9+
10+
[root@rl9-compute-0 rocky]# rm /var/lib/ansible-init.done
11+
[root@rl9-compute-0 rocky]# systemctl restart ansible-init
12+
[root@rl9-compute-0 rocky]# systemctl status ansible-init
13+
14+
15+
Without any metadata:
16+
17+
[root@rl9-compute-0 rocky]# systemctl status ansible-init
18+
● ansible-init.service
19+
Loaded: loaded (/etc/systemd/system/ansible-init.service; enabled; preset: disabled)
20+
Active: activating (start) since Fri 2024-12-13 20:41:16 UTC; 1min 45s ago
21+
Main PID: 16089 (ansible-init)
22+
Tasks: 8 (limit: 10912)
23+
Memory: 99.5M
24+
CPU: 11.687s
25+
CGroup: /system.slice/ansible-init.service
26+
├─16089 /usr/lib/ansible-init/bin/python /usr/bin/ansible-init
27+
├─16273 /usr/lib/ansible-init/bin/python3.9 /usr/lib/ansible-init/bin/ansible-playbook --connection local --inventory 127.0.0.1, /etc/ansible-init/playbooks/1-compute-init.yml
28+
├─16350 /usr/lib/ansible-init/bin/python3.9 /usr/lib/ansible-init/bin/ansible-playbook --connection local --inventory 127.0.0.1, /etc/ansible-init/playbooks/1-compute-init.yml
29+
├─16361 /bin/sh -c "/usr/bin/python3 /root/.ansible/tmp/ansible-tmp-1734122485.9542894-16350-45936546411977/AnsiballZ_mount.py && sleep 0"
30+
├─16362 /usr/bin/python3 /root/.ansible/tmp/ansible-tmp-1734122485.9542894-16350-45936546411977/AnsiballZ_mount.py
31+
├─16363 /usr/bin/mount /mnt/cluster
32+
└─16364 /sbin/mount.nfs 192.168.10.12:/exports/cluster /mnt/cluster -o ro,sync
33+
34+
Dec 13 20:41:24 rl9-compute-0.rl9.invalid ansible-init[16273]: ok: [127.0.0.1]
35+
Dec 13 20:41:24 rl9-compute-0.rl9.invalid ansible-init[16273]: TASK [Report skipping initialization if not compute node] **********************
36+
Dec 13 20:41:25 rl9-compute-0.rl9.invalid ansible-init[16273]: skipping: [127.0.0.1]
37+
Dec 13 20:41:25 rl9-compute-0.rl9.invalid ansible-init[16273]: TASK [meta] ********************************************************************
38+
Dec 13 20:41:25 rl9-compute-0.rl9.invalid ansible-init[16273]: skipping: [127.0.0.1]
39+
Dec 13 20:41:25 rl9-compute-0.rl9.invalid ansible-init[16273]: TASK [Ensure the mount directory exists] ***************************************
40+
Dec 13 20:41:25 rl9-compute-0.rl9.invalid python3[16346]: ansible-file Invoked with path=/mnt/cluster state=directory owner=root group=root mode=u=rwX,go= recurse=False force=False follow=True modification_time_format=%Y%m%d%H%M.%S access>
41+
Dec 13 20:41:25 rl9-compute-0.rl9.invalid ansible-init[16273]: changed: [127.0.0.1]
42+
Dec 13 20:41:25 rl9-compute-0.rl9.invalid ansible-init[16273]: TASK [Mount /mnt/cluster] ******************************************************
43+
Dec 13 20:41:26 rl9-compute-0.rl9.invalid python3[16362]: ansible-mount Invoked with path=/mnt/cluster src=192.168.10.12:/exports/cluster fstype=nfs opts=ro,sync state=mounted boot=True dump=0 passno=0 backup=False fstab=None
44+
[root@rl9-compute-0 rocky]# systemctl status ansible-init
45+
46+
Added metadata via horizon:
47+
48+
compute_groups ["compute"]
49+
50+
51+
OK:
52+
53+
[root@rl9-compute-0 rocky]# systemctl status ansible-init
54+
● ansible-init.service
55+
Loaded: loaded (/etc/systemd/system/ansible-init.service; enabled; preset: disabled)
56+
Active: active (exited) since Fri 2024-12-13 20:43:31 UTC; 33s ago
57+
Process: 16089 ExecStart=/usr/bin/ansible-init (code=exited, status=0/SUCCESS)
58+
Main PID: 16089 (code=exited, status=0/SUCCESS)
59+
CPU: 13.003s
60+
61+
Dec 13 20:43:31 rl9-compute-0.rl9.invalid ansible-init[16273]: ok: [127.0.0.1] => {
62+
Dec 13 20:43:31 rl9-compute-0.rl9.invalid ansible-init[16273]: "msg": "Skipping compute initialization as cannot mount exports/cluster share"
63+
Dec 13 20:43:31 rl9-compute-0.rl9.invalid ansible-init[16273]: }
64+
Dec 13 20:43:31 rl9-compute-0.rl9.invalid ansible-init[16273]: TASK [meta] ********************************************************************
65+
Dec 13 20:43:31 rl9-compute-0.rl9.invalid ansible-init[16273]: PLAY RECAP *********************************************************************
66+
Dec 13 20:43:31 rl9-compute-0.rl9.invalid ansible-init[16273]: 127.0.0.1 : ok=4 changed=1 unreachable=0 failed=0 skipped=1 rescued=0 ignored=1
67+
Dec 13 20:43:31 rl9-compute-0.rl9.invalid ansible-init[16089]: [INFO] executing remote playbooks for stage - post
68+
Dec 13 20:43:31 rl9-compute-0.rl9.invalid ansible-init[16089]: [INFO] writing sentinel file /var/lib/ansible-init.done
69+
Dec 13 20:43:31 rl9-compute-0.rl9.invalid ansible-init[16089]: [INFO] ansible-init completed successfully
70+
Dec 13 20:43:31 rl9-compute-0.rl9.invalid systemd[1]: Finished ansible-init.service.
71+
72+
Now run site.yml, then restart ansible-init again:
73+
74+
75+
[root@rl9-compute-0 rocky]# systemctl status ansible-init
76+
● ansible-init.service
77+
Loaded: loaded (/etc/systemd/system/ansible-init.service; enabled; preset: disabled)
78+
Active: active (exited) since Fri 2024-12-13 20:50:10 UTC; 11s ago
79+
Process: 18921 ExecStart=/usr/bin/ansible-init (code=exited, status=0/SUCCESS)
80+
Main PID: 18921 (code=exited, status=0/SUCCESS)
81+
CPU: 8.240s
82+
83+
Dec 13 20:50:10 rl9-compute-0.rl9.invalid ansible-init[19110]: TASK [Report skipping initialization if cannot mount nfs] **********************
84+
Dec 13 20:50:10 rl9-compute-0.rl9.invalid ansible-init[19110]: skipping: [127.0.0.1]
85+
Dec 13 20:50:10 rl9-compute-0.rl9.invalid ansible-init[19110]: TASK [meta] ********************************************************************
86+
Dec 13 20:50:10 rl9-compute-0.rl9.invalid ansible-init[19110]: skipping: [127.0.0.1]
87+
Dec 13 20:50:10 rl9-compute-0.rl9.invalid ansible-init[19110]: PLAY RECAP *********************************************************************
88+
Dec 13 20:50:10 rl9-compute-0.rl9.invalid ansible-init[19110]: 127.0.0.1 : ok=3 changed=1 unreachable=0 failed=0 skipped=2 rescued=0 ignored=0
89+
Dec 13 20:50:10 rl9-compute-0.rl9.invalid ansible-init[18921]: [INFO] executing remote playbooks for stage - post
90+
Dec 13 20:50:10 rl9-compute-0.rl9.invalid ansible-init[18921]: [INFO] writing sentinel file /var/lib/ansible-init.done
91+
Dec 13 20:50:10 rl9-compute-0.rl9.invalid ansible-init[18921]: [INFO] ansible-init completed successfully
92+
Dec 13 20:50:10 rl9-compute-0.rl9.invalid systemd[1]: Finished ansible-init.service.
93+
[root@rl9-compute-0 rocky]# ls /mnt/cluster/host
94+
hosts hostvars/
95+
[root@rl9-compute-0 rocky]# ls /mnt/cluster/hostvars/rl9-compute-
96+
rl9-compute-0/ rl9-compute-1/
97+
[root@rl9-compute-0 rocky]# ls /mnt/cluster/hostvars/rl9-compute-
98+
rl9-compute-0/ rl9-compute-1/
99+
[root@rl9-compute-0 rocky]# ls /mnt/cluster/hostvars/rl9-compute-0/
100+
hostvars.yml

0 commit comments

Comments
 (0)