File tree Expand file tree Collapse file tree 5 files changed +45
-19
lines changed Expand file tree Collapse file tree 5 files changed +45
-19
lines changed Original file line number Diff line number Diff line change @@ -90,3 +90,6 @@ roles/*
90
90
! roles /gateway /**
91
91
! roles /alertmanager /
92
92
! roles /alertmanager /**
93
+ ! roles /slurm_recompile /**
94
+ ! roles /slurm_recompile /**
95
+
Original file line number Diff line number Diff line change 254
254
hosts : cuda
255
255
become : yes
256
256
tasks :
257
- - name : Get facts about CUDA installation
258
- import_role : cuda
259
- tasks_from : facts.yml
260
-
261
- - name : Recompile and install slurm packages
262
- shell : |
263
- #!/bin/bash
264
- set -eux
265
- dnf download -y --source slurm-slurmd-ohpc
266
- rpm -i slurm-ohpc-*.src.rpm
267
- dnf install -y @'Development Tools'
268
- cd /root/rpmbuild/SPECS
269
- dnf builddep -y slurm.spec
270
- rpmbuild -bb -D "_with_nvml --with-nvml=/usr/local/cuda-{{ cuda_facts_version_short }}/targets/x86_64-linux/" slurm.spec
271
- dnf reinstall -y /root/rpmbuild/RPMS/x86_64/*.rpm
272
- # Workaround path issue: https://groups.google.com/g/slurm-users/c/cvGb4JnK8BY
273
- if [[ -e /lib64/libnvidia-ml.so ]]; then
274
- ln -s /lib64/libnvidia-ml.so.1 /lib64/libnvidia-ml.so
275
- fi
257
+ - name : Recompile slurm
258
+ import_role :
259
+ name : recompile_slurm
260
+ vars :
261
+ recompile_slurm_nvml : " {{ groups.cuda | length > 0 }}"
276
262
277
263
- name : Run post.yml hook
278
264
vars :
Original file line number Diff line number Diff line change
1
+ ---
2
+
3
+ - name : Set cuda_facts_version_short
4
+ set_fact :
5
+ cuda_facts_version_short : " {{ cuda_version_short }}"
Original file line number Diff line number Diff line change
1
+ ---
2
+ slurm_recompile_nvml : false
Original file line number Diff line number Diff line change
1
+ ---
2
+ - name : Get facts about CUDA installation
3
+ import_role : cuda
4
+ tasks_from : facts.yml
5
+
6
+ - name : Recompile and install slurm packages
7
+ shell : |
8
+ #!/bin/bash
9
+ set -eux
10
+ dnf download -y --source slurm-slurmd-ohpc
11
+ rpm -i slurm-ohpc-*.src.rpm
12
+ cd /root/rpmbuild/SPECS
13
+ dnf builddep -y slurm.spec
14
+ rpmbuild -bb{% if slurm_recompile_nvml | bool %} -D "_with_nvml --with-nvml=/usr/local/cuda-{{ cuda_facts_version_short }}/targets/x86_64-linux/"{% endif %} slurm.spec
15
+ dnf reinstall -y /root/rpmbuild/RPMS/x86_64/*.rpm
16
+ become : true
17
+
18
+ - name : Workaround missing symlink
19
+ # Workaround path issue: https://groups.google.com/g/slurm-users/c/cvGb4JnK8BY
20
+ command : ln -s /lib64/libnvidia-ml.so.1 /lib64/libnvidia-ml.so
21
+ args :
22
+ creates : /lib64/libnvidia-ml.so
23
+
24
+ - name : Cleanup Dependencies
25
+ shell : |
26
+ #!/bin/bash
27
+ set -eux
28
+ set -o pipefail
29
+ dnf history list | grep Install | grep 'builddep -y slurm.spec' | head -n 1 | awk '{print $1}' | xargs dnf history -y undo
30
+ become : true
You can’t perform that action at this time.
0 commit comments