Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 3 additions & 29 deletions ansible/roles/ofed/defaults/main.yml
Original file line number Diff line number Diff line change
@@ -1,29 +1,3 @@
ofed_version: '23.10-3.2.2.0' # LTS
ofed_download_url: https://content.mellanox.com/ofed/MLNX_OFED-{{ ofed_version }}/MLNX_OFED_LINUX-{{ ofed_version }}-{{ ofed_distro }}{{ ofed_distro_version }}-{{ ofed_arch }}.tgz
ofed_distro: rhel # NB: not expected to work on other distros due to installation differences
ofed_distro_version: "{{ ansible_distribution_version }}" # e.g. '8.9'
ofed_distro_major_version: "{{ ansible_distribution_major_version }}" # e.g. '8'
ofed_arch: "{{ ansible_architecture }}"
ofed_tmp_dir: /tmp
ofed_update_firmware: false
ofed_build_packages: # may require additional packages depending on ofed_package_selection
- autoconf
- automake
- gcc
- gcc-gfortran
- kernel-devel-{{ _ofed_loaded_kernel.stdout | trim }}
- kernel-rpm-macros
- libtool
- lsof
- patch
- pciutils
- perl
- rpm-build
- tcl
- tk
ofed_build_rl8_packages:
- gdb-headless
- python36
ofed_package_selection: # list of package selection flags for mlnxofedinstall script
- hpc
- with-nfsrdma
doca_version: '2.9.1'
doca_profile: doca-ofed
doca_repo_url: "https://linux.mellanox.com/public/repo/doca/{{ doca_version }}/rhel{{ ansible_distribution_version }}/{{ ansible_architecture }}/"
24 changes: 24 additions & 0 deletions ansible/roles/ofed/tasks/install-kernel-devel.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
- name: Get installed kernels
command: dnf list --installed kernel
register: _ofed_dnf_kernels
changed_when: false

- name: Determine running kernel
command: uname -r # e.g. 4.18.0-513.18.1.el8_9.x86_64
register: _ofed_loaded_kernel
changed_when: false

- name: Check current kernel is newest installed
assert:
that: _ofed_kernel_current == _ofed_dnf_kernels_newest
fail_msg: "Kernel {{ _ofed_loaded_kernel.stdout }} is loaded but newer {{ _ofed_dnf_kernels_newest }} is installed: consider rebooting?"
vars:
_ofed_kernel_current: >-
{{ _ofed_loaded_kernel.stdout | regex_replace('\.(?:.(?!\.))+$', '') | regex_replace('\.(?:.(?!\.))+$', '') }}
_ofed_dnf_kernels_newest: >-
{{ _ofed_dnf_kernels.stdout_lines[1:] | map('split') | map(attribute=1) | map('regex_replace', '\.(?:.(?!\.))+$', '') | community.general.version_sort | last }}
# dnf line format e.g. "kernel.x86_64 4.18.0-513.18.1.el8_9 @baseos "

- name: Install matching kernel-devel package
dnf:
name: "kernel-devel-{{ _ofed_loaded_kernel.stdout | trim }}"
110 changes: 44 additions & 66 deletions ansible/roles/ofed/tasks/install.yml
Original file line number Diff line number Diff line change
@@ -1,75 +1,53 @@
- name: Get installed kernels
command: dnf list --installed kernel
register: _ofed_dnf_kernels
- import_tasks: install-kernel-devel.yml

- name: Install DOCA repo
ansible.builtin.yum_repository:
name: doca
file: doca
description: DOCA Online Repo
baseurl: "{{ doca_repo_url }}"
enabled: true
gpgcheck: false

- name: Install doca-extra package
ansible.builtin.dnf:
name: doca-extra

- name: Build DOCA kernel modules
ansible.builtin.shell:
cmd: /opt/mellanox/doca/tools/doca-kernel-support
register: _doca_kernel_build


- name: Find generated doca-kernel-repo
ansible.builtin.shell: 'find /tmp/DOCA.* -name doca-kernel-repo-*'
register: _doca_kernel_repo # e.g. /tmp/DOCA.WVMchs2QWo/doca-kernel-repo-24.10.1.1.4.0-1.kver.5.14.0.427.31.1.el9.4.x86.64.x86_64.rpm
changed_when: false

- name: Determine running kernel
command: uname -r # e.g. 4.18.0-513.18.1.el8_9.x86_64
register: _ofed_loaded_kernel
changed_when: false

- name: Check current kernel is newest installed
assert:
that: _ofed_kernel_current == _ofed_dnf_kernels_newest
fail_msg: "Kernel {{ _ofed_loaded_kernel.stdout }} is loaded but newer {{ _ofed_dnf_kernels_newest }} is installed: consider rebooting?"
vars:
_ofed_kernel_current: >-
{{ _ofed_loaded_kernel.stdout | regex_replace('\.(?:.(?!\.))+$', '') | regex_replace('\.(?:.(?!\.))+$', '') }}
_ofed_dnf_kernels_newest: >-
{{ _ofed_dnf_kernels.stdout_lines[1:] | map('split') | map(attribute=1) | map('regex_replace', '\.(?:.(?!\.))+$', '') | community.general.version_sort | last }}
# dnf line format e.g. "kernel.x86_64 4.18.0-513.18.1.el8_9 @baseos "

- name: Enable epel
dnf:
name: epel-release

- name: Check for existing OFED installation
command: ofed_info
changed_when: false
failed_when:
- _ofed_info.rc > 0
- "'No such file or directory' not in _ofed_info.msg"
register: _ofed_info
- name: Create dnf cache
ansible.builtin.command: dnf makecache

- name: Install build prerequisites
dnf:
name: "{{ ofed_build_packages + (ofed_build_rl8_packages if ofed_distro_major_version == '8' else []) }}"
when: "'MLNX_OFED_LINUX-' + ofed_version not in _ofed_info.stdout"
# don't want to install a load of prereqs unnecessarily
- name: Install DOCA repository package
ansible.builtin.dnf:
name: "{{ _doca_kernel_repo.stdout }}"
disable_gpg_check: true

- name: Download and unpack Mellanox OFED tarball
ansible.builtin.unarchive:
src: "{{ ofed_download_url }}"
dest: "{{ ofed_tmp_dir }}"
remote_src: yes
become: no
when: "'MLNX_OFED_LINUX-' + ofed_version not in _ofed_info.stdout"
- name: Install DOCA packages
ansible.builtin.dnf:
name: "{{ doca_profile }}"

# Below from https://docs.nvidia.com/networking/display/mlnxofedv24010331/user+manual
- name: Run OFED install script
command:
cmd: >
./mlnxofedinstall
--add-kernel-support
{% if not ofed_update_firmware %}--without-fw-update{% endif %}
--force
--skip-repo
{% for pkgsel in ofed_package_selection %}
--{{ pkgsel }}
{% endfor %}
chdir: "{{ ofed_tmp_dir }}/MLNX_OFED_LINUX-{{ ofed_version }}-{{ ofed_distro }}{{ ofed_distro_version }}-{{ ofed_arch }}/"
register: _ofed_install
when: "'MLNX_OFED_LINUX-' + ofed_version not in _ofed_info.stdout"
async: "{{ 45 * 60 }}" # wait for up to 45 minutes
poll: 15 # check every 15 seconds
- name: Cleanup DOCA build directories
ansible.builtin.file:
state: absent
path: "{{ (_doca_kernel_repo.stdout | split('/'))[:2] | join('/') }}"

- name: Update initramfs
command:
cmd: dracut -f
when: '"update your initramfs" in _ofed_install.stdout | default("")'
failed_when: false # always shows errors due to deleted modules for inbox RDMA drivers
ansible.builtin.command:
cmd: dracut -f --tmpdir /var/tmp
environment:
TMPDIR: /var/tmp
register: _doca_dracut
failed_when: _doca_dracut.stderr != '' # appears rc is always 0

- name: Load the new driver
command:
cmd: /etc/init.d/openibd restart
when: '"To load the new driver" in _ofed_install.stdout | default("")'
ansible.builtin.command: /etc/init.d/openibd restart
Loading