Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions ansible/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -90,3 +90,5 @@ roles/*
!roles/gateway/**
!roles/alertmanager/
!roles/alertmanager/**
!roles/slurm_recompile/**
!roles/slurm_recompile/**
14 changes: 14 additions & 0 deletions ansible/extras.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,20 @@
name: cuda
tasks_from: "{{ 'runtime.yml' if appliances_mode == 'configure' else 'install.yml' }}"

- name: Setup vGPU
hosts: vgpu
become: yes
gather_facts: yes
tags: vgpu
tasks:
- include_role:
name: stackhpc.linux.vgpu
tasks_from: "{{ 'configure.yml' if appliances_mode == 'configure' else 'install.yml' }}"
handlers:
- name: reboot
fail:
msg: Reboot handler for stackhpc.linux.vgpu role fired unexpectedly. This was supposed to be unreachable.

- name: Persist hostkeys across rebuilds
# Must be after filesystems.yml (for storage)
# and before portal.yml (where OOD login node hostkeys are scanned)
Expand Down
10 changes: 10 additions & 0 deletions ansible/fatimage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,16 @@
name: cloudalchemy.grafana
tasks_from: install.yml

- name: Add support for NVIDIA GPU auto detection to Slurm
hosts: cuda
become: yes
tasks:
- name: Recompile slurm
import_role:
name: slurm_recompile
vars:
slurm_recompile_with_nvml: "{{ groups.cuda | length > 0 }}"

- name: Run post.yml hook
vars:
appliances_environment_root: "{{ lookup('env', 'APPLIANCES_ENVIRONMENT_ROOT') }}"
Expand Down
1 change: 1 addition & 0 deletions ansible/roles/compute_init/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ it also requires an image build with the role name added to the
| extras.yml | basic_users | All functionality [6] | No |
| extras.yml | eessi | All functionality [7] | No |
| extras.yml | cuda | None required - use image build | Yes [8] |
| extras.yml | vgpu | All functionality | Yes |
| extras.yml | persist_hostkeys | Not relevant for compute nodes | n/a |
| extras.yml | compute_init (export) | Not relevant for compute nodes | n/a |
| extras.yml | k9s (install) | Not relevant during boot | n/a |
Expand Down
7 changes: 7 additions & 0 deletions ansible/roles/compute_init/files/compute-init.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
enable_basic_users: "{{ os_metadata.meta.basic_users | default(false) | bool }}"
enable_eessi: "{{ os_metadata.meta.eessi | default(false) | bool }}"
enable_chrony: "{{ os_metadata.meta.chrony | default(false) | bool }}"
enable_vgpu: "{{ os_metadata.meta.vpgu | default(false) | bool }}"

# TODO: "= role defaults" - could be moved to a vars_file: on play with similar precedence effects
resolv_conf_nameservers: []
Expand Down Expand Up @@ -295,6 +296,12 @@
cmd: "cvmfs_config setup"
when: enable_eessi

- name: Configure VGPUs
include_role:
name: stackhpc.linux.vgpu
tasks_from: 'configure.yml'
when: enable_vgpu

# NB: don't need conditional block on enable_compute as have already exited
# if not the case
- name: Write Munge key
Expand Down
4 changes: 4 additions & 0 deletions ansible/roles/cuda/tasks/facts.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
- name: Set cuda_facts_version_short
set_fact:
cuda_facts_version_short: "{{ cuda_version_short }}"
28 changes: 28 additions & 0 deletions ansible/roles/slurm_recompile/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# slurm_recompile
=================

Recompiles slurm from source RPMs and installs the packages that were built.

Requirements
------------

Role Variables
--------------

See `defaults/main.yml`.

Dependencies
------------

Example Playbook
----------------

- hosts: compute
tasks:
- import_role:
name: slurm_recompile

License
-------

Apache-2.0
4 changes: 4 additions & 0 deletions ansible/roles/slurm_recompile/defaults/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
# Whether to link slurm against the NVIDIA management library
slurm_recompile_with_nvml: false

41 changes: 41 additions & 0 deletions ansible/roles/slurm_recompile/tasks/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
---
- name: Get facts about CUDA installation
import_role:
name: cuda
tasks_from: facts.yml

- name: Gather the package facts
ansible.builtin.package_facts:
manager: auto

- name: Set fact containing slurm package facts
set_fact:
slurm_package: "{{ ansible_facts.packages['slurm-slurmd-ohpc'].0 }}"

- name: Recompile and install slurm packages
shell: |
#!/bin/bash
source /etc/profile
set -eux
dnf download -y --source slurm-slurmd-ohpc-{{ slurm_package.version }}-{{ slurm_package.release }}
rpm -i slurm-ohpc-*.src.rpm
cd /root/rpmbuild/SPECS
dnf builddep -y slurm.spec
rpmbuild -bb{% if slurm_recompile_with_nvml | bool %} -D "_with_nvml --with-nvml=/usr/local/cuda-{{ cuda_facts_version_short }}/targets/x86_64-linux/"{% endif %} slurm.spec
dnf reinstall -y /root/rpmbuild/RPMS/x86_64/*.rpm
become: true

- name: Workaround missing symlink
# Workaround path issue: https://groups.google.com/g/slurm-users/c/cvGb4JnK8BY
command: ln -s /lib64/libnvidia-ml.so.1 /lib64/libnvidia-ml.so
args:
creates: /lib64/libnvidia-ml.so
when: slurm_recompile_with_nvml | bool

- name: Cleanup Dependencies
shell: |
#!/bin/bash
set -eux
set -o pipefail
dnf history list | grep Install | grep 'builddep -y slurm.spec' | head -n 1 | awk '{print $1}' | xargs dnf history -y undo
become: true
10 changes: 10 additions & 0 deletions ansible/validate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,13 @@
- import_role:
name: lustre
tasks_from: validate.yml

- name: Validate vGPU configuration
hosts: vgpu
become: yes
gather_facts: yes
tags: vgpu
tasks:
- include_role:
name: stackhpc.linux.vgpu
tasks_from: validate.yml
226 changes: 226 additions & 0 deletions docs/mig.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
# vGPU/MIG configuration

This page details how to configure Multi Instance GPU (MIG) in Slurm.

## Pre-requisites

- Image built with cuda support. This should automatically recompile slurm
against NVML. The builder will need to be added to the `vgpu` and `cuda`
groups.

## Inventory

Add relevant hosts to the ``vgpu`` group, for example in `environments/$ENV/inventory/groups`:

```
[vgpu:children]
cuda
```

## Configuration

Use variables from the [stackhpc.linux.vgpu](https://github.com/stackhpc/ansible-collection-linux/tree/main/roles/vgpu) role.

For example in: `environments/<environment>/inventory/group_vars/all/vgpu`:

```
---
vgpu_definitions:
- pci_address: "0000:17:00.0"
mig_devices:
"1g.10gb": 4
"4g.40gb": 1
- pci_address: "0000:81:00.0"
mig_devices:
"1g.10gb": 4
"4g.40gb": 1
```

The appliance will use the driver installed via the ``cuda`` role.

Use ``lspci`` to determine the PCI addresses e.g:

```
[root@io-io-gpu-02 ~]# lspci -nn | grep -i nvidia
06:00.0 3D controller [0302]: NVIDIA Corporation GH100 [H100 SXM5 80GB] [10de:2330] (rev a1)
0c:00.0 3D controller [0302]: NVIDIA Corporation GH100 [H100 SXM5 80GB] [10de:2330] (rev a1)
46:00.0 3D controller [0302]: NVIDIA Corporation GH100 [H100 SXM5 80GB] [10de:2330] (rev a1)
4c:00.0 3D controller [0302]: NVIDIA Corporation GH100 [H100 SXM5 80GB] [10de:2330] (rev a1)
```

The supported profiles can be discovered by consulting the [NVIDIA documentation](https://docs.nvidia.com/datacenter/tesla/mig-user-guide/index.html#supported-mig-profiles)
or interactively by running the following on one of the compute nodes with GPU resources:

```
[rocky@io-io-gpu-05 ~]$ sudo nvidia-smi -i 0 -mig 1
Enabled MIG Mode for GPU 00000000:06:00.0
All done.
[rocky@io-io-gpu-05 ~]$ sudo nvidia-smi mig -lgip
+-----------------------------------------------------------------------------+
| GPU instance profiles: |
| GPU Name ID Instances Memory P2P SM DEC ENC |
| Free/Total GiB CE JPEG OFA |
|=============================================================================|
| 0 MIG 1g.10gb 19 7/7 9.75 No 16 1 0 |
| 1 1 0 |
+-----------------------------------------------------------------------------+
| 0 MIG 1g.10gb+me 20 1/1 9.75 No 16 1 0 |
| 1 1 1 |
+-----------------------------------------------------------------------------+
| 0 MIG 1g.20gb 15 4/4 19.62 No 26 1 0 |
| 1 1 0 |
+-----------------------------------------------------------------------------+
| 0 MIG 2g.20gb 14 3/3 19.62 No 32 2 0 |
| 2 2 0 |
+-----------------------------------------------------------------------------+
| 0 MIG 3g.40gb 9 2/2 39.50 No 60 3 0 |
| 3 3 0 |
+-----------------------------------------------------------------------------+
| 0 MIG 4g.40gb 5 1/1 39.50 No 64 4 0 |
| 4 4 0 |
+-----------------------------------------------------------------------------+
| 0 MIG 7g.80gb 0 1/1 79.25 No 132 7 0 |
| 8 7 1 |
+-----------------------------------------------------------------------------+
| 1 MIG 1g.10gb 19 7/7 9.75 No 16 1 0 |
| 1 1 0 |
+-----------------------------------------------------------------------------+
| 1 MIG 1g.10gb+me 20 1/1 9.75 No 16 1 0 |
| 1 1 1 |
+-----------------------------------------------------------------------------+
| 1 MIG 1g.20gb 15 4/4 19.62 No 26 1 0 |
| 1 1 0 |
+-----------------------------------------------------------------------------+
| 1 MIG 2g.20gb 14 3/3 19.62 No 32 2 0 |
| 2 2 0 |
+-----------------------------------------------------------------------------+
| 1 MIG 3g.40gb 9 2/2 39.50 No 60 3 0 |
| 3 3 0 |
+-----------------------------------------------------------------------------+
| 1 MIG 4g.40gb 5 1/1 39.50 No 64 4 0 |
| 4 4 0 |
+-----------------------------------------------------------------------------+
| 1 MIG 7g.80gb 0 1/1 79.25 No 132 7 0 |
| 8 7 1 |
+-----------------------------------------------------------------------------+
| 2 MIG 1g.10gb 19 7/7 9.75 No 16 1 0 |
| 1 1 0 |
+-----------------------------------------------------------------------------+
| 2 MIG 1g.10gb+me 20 1/1 9.75 No 16 1 0 |
| 1 1 1 |
+-----------------------------------------------------------------------------+
| 2 MIG 1g.20gb 15 4/4 19.62 No 26 1 0 |
| 1 1 0 |
+-----------------------------------------------------------------------------+
| 2 MIG 2g.20gb 14 3/3 19.62 No 32 2 0 |
| 2 2 0 |
+-----------------------------------------------------------------------------+
| 2 MIG 3g.40gb 9 2/2 39.50 No 60 3 0 |
| 3 3 0 |
+-----------------------------------------------------------------------------+
| 2 MIG 4g.40gb 5 1/1 39.50 No 64 4 0 |
| 4 4 0 |
+-----------------------------------------------------------------------------+
| 2 MIG 7g.80gb 0 1/1 79.25 No 132 7 0 |
| 8 7 1 |
+-----------------------------------------------------------------------------+
| 3 MIG 1g.10gb 19 7/7 9.75 No 16 1 0 |
| 1 1 0 |
+-----------------------------------------------------------------------------+
| 3 MIG 1g.10gb+me 20 1/1 9.75 No 16 1 0 |
| 1 1 1 |
+-----------------------------------------------------------------------------+
| 3 MIG 1g.20gb 15 4/4 19.62 No 26 1 0 |
| 1 1 0 |
+-----------------------------------------------------------------------------+
| 3 MIG 2g.20gb 14 3/3 19.62 No 32 2 0 |
| 2 2 0 |
+-----------------------------------------------------------------------------+
| 3 MIG 3g.40gb 9 2/2 39.50 No 60 3 0 |
| 3 3 0 |
+-----------------------------------------------------------------------------+
| 3 MIG 4g.40gb 5 1/1 39.50 No 64 4 0 |
| 4 4 0 |
+-----------------------------------------------------------------------------+
| 3 MIG 7g.80gb 0 1/1 79.25 No 132 7 0 |
| 8 7 1 |
+-----------------------------------------------------------------------------+
```

## compute_init configuration for slurm triggered rebuild (optional)

You only need to configure this if you are using the slurm triggered rebuild
feature. Use the ``vgpu`` metadata option to enable creation of mig devices on
rebuild.

## GRES configuration

GPU resources need to be added to the OpenHPC nodegroup definitions (`openhpc_nodegroups`). To
do this you need to determine the names of the GPU types as detected by slurm. First
deploy slurm with the default nodegroup definitions to get a working cluster. Make a temporary
copy of slurm.conf:

```
cp /var/spool/slurm/conf-cache/slurm.conf /tmp/
```

Then create a `/tmp/gres.conf` which enables autodetection:

```
AutoDetect=nvml
```

You will then be able to run: `sudo slurmd -f /tmp/slurm.conf -G` on a compute node where GPU resources exist. An example is shown below:

```
[rocky@io-io-gpu-02 ~]$ sudo slurmd -f /tmp/slurm.conf -G
slurmd-io-io-gpu-02: Gres Name=gpu Type=nvidia_h100_80gb_hbm3 Count=1 Index=0 ID=7696487 File=/dev/nvidia0 Links=(null) Flags=HAS_FILE,HAS_TYPE,ENV_NVML,ENV_RSMI,ENV_ONEAPI
,ENV_OPENCL,ENV_DEFAULT
slurmd-io-io-gpu-02: Gres Name=gpu Type=nvidia_h100_80gb_hbm3 Count=1 Index=1 ID=7696487 File=/dev/nvidia1 Links=(null) Flags=HAS_FILE,HAS_TYPE,ENV_NVML,ENV_RSMI,ENV_ONEAPI
,ENV_OPENCL,ENV_DEFAULT
slurmd-io-io-gpu-02: Gres Name=gpu Type=nvidia_h100_80gb_hbm3_4g.40gb Count=1 Index=291 ID=7696487 File=/dev/nvidia-caps/nvidia-cap291 Links=(null) Flags=HAS_FILE,HAS_TYPE,
ENV_NVML,ENV_RSMI,ENV_ONEAPI,ENV_OPENCL,ENV_DEFAULT
slurmd-io-io-gpu-02: Gres Name=gpu Type=nvidia_h100_80gb_hbm3_4g.40gb Count=1 Index=417 ID=7696487 File=/dev/nvidia-caps/nvidia-cap417 Links=(null) Flags=HAS_FILE,HAS_TYPE,
ENV_NVML,ENV_RSMI,ENV_ONEAPI,ENV_OPENCL,ENV_DEFAULT
slurmd-io-io-gpu-02: Gres Name=gpu Type=nvidia_h100_80gb_hbm3_1g.10gb Count=1 Index=336 ID=7696487 File=/dev/nvidia-caps/nvidia-cap336 Links=(null) Flags=HAS_FILE,HAS_TYPE,
ENV_NVML,ENV_RSMI,ENV_ONEAPI,ENV_OPENCL,ENV_DEFAULT
slurmd-io-io-gpu-02: Gres Name=gpu Type=nvidia_h100_80gb_hbm3_1g.10gb Count=1 Index=345 ID=7696487 File=/dev/nvidia-caps/nvidia-cap345 Links=(null) Flags=HAS_FILE,HAS_TYPE,
ENV_NVML,ENV_RSMI,ENV_ONEAPI,ENV_OPENCL,ENV_DEFAULT
slurmd-io-io-gpu-02: Gres Name=gpu Type=nvidia_h100_80gb_hbm3_1g.10gb Count=1 Index=354 ID=7696487 File=/dev/nvidia-caps/nvidia-cap354 Links=(null) Flags=HAS_FILE,HAS_TYPE,
ENV_NVML,ENV_RSMI,ENV_ONEAPI,ENV_OPENCL,ENV_DEFAULT
slurmd-io-io-gpu-02: Gres Name=gpu Type=nvidia_h100_80gb_hbm3_1g.10gb Count=1 Index=507 ID=7696487 File=/dev/nvidia-caps/nvidia-cap507 Links=(null) Flags=HAS_FILE,HAS_TYPE,
ENV_NVML,ENV_RSMI,ENV_ONEAPI,ENV_OPENCL,ENV_DEFAULT
slurmd-io-io-gpu-02: Gres Name=gpu Type=nvidia_h100_80gb_hbm3_1g.10gb Count=1 Index=516 ID=7696487 File=/dev/nvidia-caps/nvidia-cap516 Links=(null) Flags=HAS_FILE,HAS_TYPE,
ENV_NVML,ENV_RSMI,ENV_ONEAPI,ENV_OPENCL,ENV_DEFAULT
slurmd-io-io-gpu-02: Gres Name=gpu Type=nvidia_h100_80gb_hbm3_1g.10gb Count=1 Index=525 ID=7696487 File=/dev/nvidia-caps/nvidia-cap525 Links=(null) Flags=HAS_FILE,HAS_TYPE,
ENV_NVML,ENV_RSMI,ENV_ONEAPI,ENV_OPENCL,ENV_DEFAULT
```

NOTE: If you have configured a Gres= line in slurm.conf already. You may have to adjust or remove it.

GRES resources can then be configured manually. An example is shown below
(`environments/<environment>/inventory/group_vars/all/openhpc.yml`):

```
openhpc_partitions:
- name: cpu
- name: gpu

openhpc_nodegroups:
- name: cpu
- name: gpu
gres_autodetect: nvml
gres:
- conf: "gpu:nvidia_h100_80gb_hbm3:2"
- conf: "gpu:nvidia_h100_80gb_hbm3_4g.40gb:2"
- conf: "gpu:nvidia_h100_80gb_hbm3_1g.10gb:6"

openhpc_config:
GresTypes:
- gpu

```

Making sure the types (the identifier after `gpu:`) match those collected with `slurmd -G`. Substrings
of this type are also permissable, see the [slurm docs](https://slurm.schedmd.com/gres.html#MIG_Management)
for more details.
Loading
Loading