File tree Expand file tree Collapse file tree 8 files changed +61
-0
lines changed
environments/common/inventory Expand file tree Collapse file tree 8 files changed +61
-0
lines changed Original file line number Diff line number Diff line change 48
48
name : cuda
49
49
tasks_from : " {{ 'runtime.yml' if appliances_mode == 'configure' else 'install.yml' }}"
50
50
51
+ - name : Setup vGPU
52
+ hosts : vgpu
53
+ become : yeas
54
+ gather_facts : yes
55
+ tags : vgpu
56
+ tasks :
57
+ - include_role :
58
+ name : stackhpc.linux.vgpu
59
+ tasks_from : " {{ 'configure.yml' if appliances_mode == 'configure' else 'install.yml' }}"
60
+ handlers :
61
+ - name : reboot
62
+ fail :
63
+ msg : Reboot handlier fired unexpectedly. This was supposed to be unreachable.
64
+
51
65
- name : Persist hostkeys across rebuilds
52
66
# Must be after filesystems.yml (for storage)
53
67
# and before portal.yml (where OOD login node hostkeys are scanned)
Original file line number Diff line number Diff line change @@ -75,6 +75,7 @@ it also requires an image build with the role name added to the
75
75
| extras.yml | basic_users | All functionality [ 6] | No |
76
76
| extras.yml | eessi | All functionality [ 7] | No |
77
77
| extras.yml | cuda | None required - use image build | Yes [ 8] |
78
+ | extras.yml | vgpu | All functionality | Yes |
78
79
| extras.yml | persist_hostkeys | Not relevant for compute nodes | n/a |
79
80
| extras.yml | compute_init (export) | Not relevant for compute nodes | n/a |
80
81
| extras.yml | k9s (install) | Not relevant during boot | n/a |
Original file line number Diff line number Diff line change 19
19
enable_basic_users : " {{ os_metadata.meta.basic_users | default(false) | bool }}"
20
20
enable_eessi : " {{ os_metadata.meta.eessi | default(false) | bool }}"
21
21
enable_chrony : " {{ os_metadata.meta.chrony | default(false) | bool }}"
22
+ enable_vgpu : " {{ os_metadata.meta.chrony | default(false) | bool }}"
23
+
22
24
23
25
# TODO: "= role defaults" - could be moved to a vars_file: on play with similar precedence effects
24
26
resolv_conf_nameservers : []
295
297
cmd : " cvmfs_config setup"
296
298
when : enable_eessi
297
299
300
+ - name : Configure VGPUs
301
+ include_role :
302
+ name : stackhpc.linux.vgpu
303
+ tasks_from : ' configure.yml'
304
+ when : enable_vgpu
305
+
298
306
# NB: don't need conditional block on enable_compute as have already exited
299
307
# if not the case
300
308
- name : Write Munge key
Original file line number Diff line number Diff line change 83
83
- import_role :
84
84
name : lustre
85
85
tasks_from : validate.yml
86
+
87
+ - name : Validate vGPU configuration
88
+ hosts : vgpu
89
+ become : yes
90
+ gather_facts : yes
91
+ tags : vgpu
92
+ tasks :
93
+ - include_role :
94
+ name : stackhpc.linux.vgpu
95
+ tasks_from : validate.yml
Original file line number Diff line number Diff line change
1
+ # vGPU/MIG configuration
2
+
3
+ Use variables from the [ stackhpc.linux.vgpu] ( https://github.com/stackhpc/ansible-collection-linux/tree/main/roles/vgpu ) role.
4
+
5
+ For example in: ` environments/<environment>/inventory/group_vars/all/vgpu ` :
6
+
7
+ ```
8
+ ---
9
+ vgpu_definitions:
10
+ - pci_address: "0000:17:00.0"
11
+ mig_devices:
12
+ "1g.10gb": 1
13
+ "2g.20gb": 3
14
+ ```
15
+
16
+ The appliance will use the driver installed via the `` cuda `` role. Use `` lspci `` to determine the PCI
17
+ addresses.
Original file line number Diff line number Diff line change
1
+ ---
2
+
3
+ # Nvidia driver is provided by cuda role.
4
+ vgpu_nvidia_driver_install_enabled: false
Original file line number Diff line number Diff line change @@ -112,6 +112,10 @@ freeipa_client
112
112
[cuda]
113
113
# Hosts to install NVIDIA CUDA on - see ansible/roles/cuda/README.md
114
114
115
+ [vgpu]
116
+ # FIXME: Update once PR merged
117
+ # Hosts where vGPU/MIG should be configured - see https://github.com/stackhpc/ansible-collection-linux/pull/43/files#diff-74e43d9a34244aa54721f4dbd12a029baa87957afd762b88c2677aa75414f514R75
118
+
115
119
[eessi]
116
120
# Hosts on which EESSI stack should be configured
117
121
Original file line number Diff line number Diff line change @@ -55,4 +55,7 @@ collections:
55
55
version : 0.0.15
56
56
- name : stackhpc.pulp
57
57
version : 0.5.5
58
+ - name : https://github.com/stackhpc/ansible-collection-linux
59
+ type : git
60
+ version : feature/mig-only
58
61
...
You can’t perform that action at this time.
0 commit comments