diff --git a/.common-ci.yml b/.common-ci.yml index 48ee7129b..13d377631 100644 --- a/.common-ci.yml +++ b/.common-ci.yml @@ -19,7 +19,7 @@ default: command: ["--experimental"] variables: - DRIVER_VERSIONS: 535.274.02 570.195.03 580.105.08 + DRIVER_VERSIONS: 535.274.02 570.195.03 580.105.08 590.48.01 BUILD_MULTI_ARCH_IMAGES: "true" stages: @@ -81,13 +81,19 @@ trigger-pipeline: .driver-versions: parallel: matrix: - - DRIVER_VERSION: [535.274.02, 570.195.03, 580.105.08] + - DRIVER_VERSION: [535.274.02, 570.195.03, 580.105.08, 590.48.01] # Define the driver versions for jobs that can be run in parallel .driver-versions-ubuntu24.04: parallel: matrix: - - DRIVER_VERSION: [570.195.03, 580.105.08] + - DRIVER_VERSION: [570.195.03, 580.105.08, 590.48.01] + +# Define the driver versions for jobs that can be run in parallel +.driver-versions-debian13: + parallel: + matrix: + - DRIVER_VERSION: [590.48.01] # Define the matrix of precompiled jobs that can be run in parallel for ubuntu22.04 .driver-versions-precompiled-ubuntu22.04: @@ -114,6 +120,10 @@ trigger-pipeline: variables: DIST: ubuntu24.04 +.dist-debian13: + variables: + DIST: debian13 + .dist-rhel8: variables: DIST: rhel8 @@ -199,6 +209,14 @@ trigger-pipeline: rules: - if: $CI_PIPELINE_SOURCE != "schedule" && $CI_COMMIT_TAG == null +.release-debian13: + # Perform for each DRIVER_VERSION + extends: + - .release-generic + - .driver-versions-debian13 + rules: + - if: $CI_PIPELINE_SOURCE != "schedule" && $CI_COMMIT_TAG == null + .release-rhel9: # Perform for each DRIVER_VERSION extends: @@ -245,6 +263,15 @@ trigger-pipeline: OUT_REGISTRY: "${NGC_REGISTRY}" OUT_IMAGE_NAME: "${NGC_STAGING_REGISTRY}/driver" +.release:staging-debian13: + extends: + - .release-debian13 + variables: + OUT_REGISTRY_USER: "${NGC_REGISTRY_USER}" + OUT_REGISTRY_TOKEN: "${NGC_REGISTRY_TOKEN}" + OUT_REGISTRY: "${NGC_REGISTRY}" + OUT_IMAGE_NAME: "${NGC_STAGING_REGISTRY}/driver" + .release:staging-rhel9: extends: - .release-rhel9 @@ -281,6 +308,18 @@ trigger-pipeline: VERSION: "${RELEASE_DEVEL_TAG}" OUT_VERSION: "${DEVEL_RELEASE_IMAGE_VERSION}" +.release:external-debian13: + extends: + - .release-debian13 + rules: + - if: $CI_COMMIT_TAG + variables: + #VERSION: "${CI_COMMIT_TAG}" + OUT_VERSION: "" + - if: $CI_COMMIT_BRANCH == $RELEASE_DEVEL_BRANCH + variables: + VERSION: "${RELEASE_DEVEL_TAG}" + OUT_VERSION: "${DEVEL_RELEASE_IMAGE_VERSION}" .release:external-ubuntu22.04: extends: - .release-ubuntu22.04 @@ -308,6 +347,13 @@ release:staging-ubuntu24.04: needs: - image-ubuntu24.04 +release:staging-debian13: + extends: + - .release:staging-debian13 + - .dist-debian13 + needs: + - image-debian13 + release:staging-rhel8: extends: - .release:staging diff --git a/.github/workflows/image.yaml b/.github/workflows/image.yaml index 213b35c46..5a674a1c5 100644 --- a/.github/workflows/image.yaml +++ b/.github/workflows/image.yaml @@ -31,9 +31,11 @@ jobs: - 535.274.02 - 570.195.03 - 580.105.08 + - 590.48.01 dist: - ubuntu22.04 - ubuntu24.04 + - debian13 - rhel8 - rhel9 ispr: @@ -41,6 +43,12 @@ jobs: exclude: - dist: ubuntu24.04 driver: 535.274.02 + - dist: debian13 + driver: 535.274.02 + - dist: debian13 + driver: 570.195.03 + - dist: debian13 + driver: 580.105.08 fail-fast: false steps: - uses: actions/checkout@v6 diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 8782e3439..3f28afd59 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -58,6 +58,15 @@ include: rules: - if: $CI_PIPELINE_SOURCE != "schedule" +# Define the image build targets +.image-build-debian13: + # Perform for each DRIVER_VERSION + extends: + - .driver-versions-debian13 + - .image-build-generic + rules: + - if: $CI_PIPELINE_SOURCE != "schedule" + # Define the image build targets .image-build-rhel9: # Perform for each DRIVER_VERSION @@ -83,6 +92,11 @@ image-ubuntu24.04: - .image-build-ubuntu24.04 - .dist-ubuntu24.04 +image-debian13: + extends: + - .image-build-debian13 + - .dist-debian13 + image-rhel8: extends: - .image-build diff --git a/.nvidia-ci.yml b/.nvidia-ci.yml index 01e739526..ba5761ccb 100644 --- a/.nvidia-ci.yml +++ b/.nvidia-ci.yml @@ -34,7 +34,7 @@ variables: # Define the public staging registry STAGING_REGISTRY: ghcr.io/nvidia STAGING_VERSION: "${CI_COMMIT_SHORT_SHA}" - PUBLISH_VERSIONS: 535.274.02 570.195.03 580.105.08 + PUBLISH_VERSIONS: 590.48.01 .image-pull-rules: # We delay the job start to allow the public pipeline to generate the required images. @@ -88,6 +88,16 @@ variables: when: never - !reference [.image-pull-rules, rules] +.image-pull-debian13: + # Perform for each DRIVER_VERSION + extends: + - .driver-versions-debian13 + - .image-pull-generic + rules: + - if: $CI_PIPELINE_SOURCE == "schedule" + when: never + - !reference [.image-pull-rules, rules] + image-precompiled-ubuntu24.04: variables: DIST: signed_ubuntu24.04 @@ -136,6 +146,11 @@ image-ubuntu24.04: - .image-pull-ubuntu24.04 - .dist-ubuntu24.04 +image-debian13: + extends: + - .image-pull-debian13 + - .dist-debian13 + image-rhel8: extends: - .image-pull @@ -227,6 +242,18 @@ image-rhel9: - if: $CI_PIPELINE_SOURCE == "merge_request_event" - !reference [.pipeline-trigger-rules, rules] +.scan-debian13: + # Repeat for each DRIVER_VERSION + extends: + - .driver-versions-debian13 + - .scan-generic + rules: + - !reference [.scan-rules-common, rules] + - if: $CI_PIPELINE_SOURCE == "schedule" + when: never + - if: $CI_PIPELINE_SOURCE == "merge_request_event" + - !reference [.pipeline-trigger-rules, rules] + .scan-precompiled-ubuntu24.04: variables: DIST: signed_ubuntu24.04 @@ -283,6 +310,22 @@ scan-ubuntu24.04-arm64: needs: - image-ubuntu24.04 +scan-debian13-amd64: + extends: + - .scan-debian13 + - .dist-debian13 + - .platform-amd64 + needs: + - image-debian13 + +scan-debian13-arm64: + extends: + - .scan-debian13 + - .dist-debian13 + - .platform-arm64 + needs: + - image-debian13 + scan-precompiled-ubuntu24.04-amd64: variables: PLATFORM: linux/amd64 @@ -358,6 +401,12 @@ release:ngc-ubuntu24.04: - .dist-ubuntu24.04 - .driver-versions-ubuntu24.04 +release:ngc-debian13: + extends: + - .release:ngc + - .dist-debian13 + - .driver-versions-debian13 + release:ngc-precompiled-ubuntu24.04: variables: DIST: signed_ubuntu24.04 diff --git a/Makefile b/Makefile index ee633db42..25cbea2dd 100644 --- a/Makefile +++ b/Makefile @@ -54,7 +54,7 @@ OUT_IMAGE_TAG = $(OUT_IMAGE_VERSION)-$(OUT_DIST) OUT_IMAGE = $(OUT_IMAGE_NAME):$(OUT_IMAGE_TAG) ##### Public rules ##### -DISTRIBUTIONS := ubuntu18.04 ubuntu20.04 ubuntu22.04 ubuntu24.04 signed_ubuntu20.04 signed_ubuntu22.04 signed_ubuntu24.04 rhel8 rhel9 flatcar fedora36 sles15.3 precompiled_rhcos +DISTRIBUTIONS := ubuntu18.04 ubuntu20.04 ubuntu22.04 ubuntu24.04 signed_ubuntu20.04 signed_ubuntu22.04 signed_ubuntu24.04 rhel8 rhel9 flatcar fedora36 sles15.3 precompiled_rhcos debian13 RHCOS_VERSIONS := rhcos4.14 rhcos4.15 rhcos4.16 rhcos4.17 rhcos4.18 rhel9.6 PUSH_TARGETS := $(patsubst %, push-%, $(DISTRIBUTIONS)) BASE_FROM := noble jammy focal diff --git a/debian13/Dockerfile b/debian13/Dockerfile new file mode 100644 index 000000000..faa90ad0a --- /dev/null +++ b/debian13/Dockerfile @@ -0,0 +1,99 @@ +ARG BASE_IMAGE=debian:13.2-slim + +FROM ${BASE_IMAGE} AS build + +ARG TARGETARCH +ARG GOLANG_VERSION + +# Arg to indicate if driver type is either of passthrough(baremetal) or vgpu +ARG DRIVER_TYPE=passthrough +ENV DRIVER_TYPE=$DRIVER_TYPE + +SHELL ["/bin/bash", "-c"] + +RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections + +# Remove cuda repository to avoid GPG errors +RUN rm -f /etc/apt/sources.list.d/cuda* + +RUN apt-get update && apt-get install -y --no-install-recommends \ + apt-utils \ + build-essential \ + ca-certificates \ + curl \ + git \ + wget && \ + rm -rf /var/lib/apt/lists/* + + + +# download appropriate binary based on the target architecture for multi-arch builds +RUN OS_ARCH=${TARGETARCH/x86_64/amd64} && OS_ARCH=${OS_ARCH/aarch64/arm64} && \ + wget -nv -O - https://go.dev/dl/go${GOLANG_VERSION}.linux-${OS_ARCH}.tar.gz \ + | tar -C /usr/local -xz + +ENV PATH=/usr/local/go/bin:$PATH + +WORKDIR /work + +RUN if [ "$DRIVER_TYPE" = "vgpu" ]; then \ + git clone https://github.com/NVIDIA/gpu-driver-container driver && \ + cd driver/vgpu/src && \ + go build -o vgpu-util && \ + mv vgpu-util /work; fi + +FROM ${BASE_IMAGE} + +SHELL ["/bin/bash", "-c"] + +ARG BASE_URL=https://us.download.nvidia.com/tesla +ARG TARGETARCH +ENV TARGETARCH=$TARGETARCH +ARG DRIVER_VERSION +ENV DRIVER_VERSION=$DRIVER_VERSION +ENV DEBIAN_FRONTEND=noninteractive + +# Arg to indicate if driver type is either of passthrough(baremetal) or vgpu +ARG DRIVER_TYPE=passthrough +ENV DRIVER_TYPE=$DRIVER_TYPE +ARG DRIVER_BRANCH=550 +ENV DRIVER_BRANCH=$DRIVER_BRANCH +ARG VGPU_LICENSE_SERVER_TYPE=NLS +ENV VGPU_LICENSE_SERVER_TYPE=$VGPU_LICENSE_SERVER_TYPE +# Enable vGPU version compability check by default +ARG DISABLE_VGPU_VERSION_CHECK=true +ENV DISABLE_VGPU_VERSION_CHECK=$DISABLE_VGPU_VERSION_CHECK +ENV NVIDIA_VISIBLE_DEVICES=void + +RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections + +RUN echo "TARGETARCH=$TARGETARCH" + +ADD install.sh /tmp + +RUN usermod -o -u 0 -g 0 _apt && \ + /tmp/install.sh depinstall && /tmp/install.sh setup_cuda_repo + +COPY nvidia-driver /usr/local/bin + +COPY --from=build /work/vgpu-util* /usr/local/bin + +ADD drivers drivers/ + +# Fetch the installer, fabricmanager and libnvidia-nscq automatically for passthrough/baremetal types +RUN if [ "$DRIVER_TYPE" != "vgpu" ]; then \ + cd drivers && \ + /tmp/install.sh download_installer; fi + +RUN /tmp/install.sh extra_pkgs_install + +WORKDIR /drivers + +# Install / upgrade packages here that are required to resolve CVEs +ARG CVE_UPDATES +RUN if [ -n "${CVE_UPDATES}" ]; then \ + apt-get update && apt-get upgrade -y ${CVE_UPDATES} && \ + rm -rf /var/lib/apt/lists/*; \ + fi + +ENTRYPOINT ["nvidia-driver", "init"] diff --git a/debian13/README.md b/debian13/README.md new file mode 100644 index 000000000..41e8e0f09 --- /dev/null +++ b/debian13/README.md @@ -0,0 +1,3 @@ +# Debian 13 [![build status](https://gitlab.com/nvidia/driver/badges/master/build.svg)](https://gitlab.com/nvidia/driver/commits/master) + +See https://github.com/NVIDIA/nvidia-docker/wiki/Driver-containers-(Beta) diff --git a/debian13/drivers/README.md b/debian13/drivers/README.md new file mode 100644 index 000000000..ddc27b5c9 --- /dev/null +++ b/debian13/drivers/README.md @@ -0,0 +1 @@ +# Folder for downloading vGPU drivers and dependent metadata files \ No newline at end of file diff --git a/debian13/install.sh b/debian13/install.sh new file mode 100755 index 000000000..bbca69098 --- /dev/null +++ b/debian13/install.sh @@ -0,0 +1,119 @@ +#!/usr/bin/env bash + +set -eu + +download_installer () { + DRIVER_ARCH=${TARGETARCH/amd64/x86_64} && DRIVER_ARCH=${DRIVER_ARCH/arm64/aarch64} && curl -fSsl -O $BASE_URL/$DRIVER_VERSION/NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run && \ + chmod +x NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run; +} + +dep_install () { + if [ "$TARGETARCH" = "amd64" ]; then + dpkg --add-architecture i386 && \ + apt-get update && apt-get install -y --no-install-recommends \ + apt-utils \ + build-essential \ + ca-certificates \ + curl \ + kmod \ + file \ + gnupg \ + libelf-dev \ + libglvnd-dev \ + pkg-config && \ + rm -rf /var/lib/apt/lists/* + elif [ "$TARGETARCH" = "arm64" ]; then + dpkg --add-architecture arm64 && \ + apt-get update && apt-get install -y \ + build-essential \ + ca-certificates \ + curl \ + kmod \ + file \ + gnupg \ + libelf-dev \ + libglvnd-dev && \ + rm -rf /var/lib/apt/lists/* + fi +} + +setup_cuda_repo() { + # Fetch public CUDA GPG key and configure apt to only use this key when downloading CUDA packages + OS_ARCH=${TARGETARCH/amd64/x86_64} && OS_ARCH=${OS_ARCH/arm64/sbsa}; + curl -fSsL "https://developer.download.nvidia.com/compute/cuda/repos/debian13/${OS_ARCH}/cuda-keyring_1.1-1_all.deb" -o cuda-keyring_1.1-1_all.deb + dpkg -i cuda-keyring_1.1-1_all.deb +} + +fabricmanager_install() { + if [ "$DRIVER_BRANCH" -ge "580" ]; then + apt-get install -y --no-install-recommends nvidia-fabricmanager=${DRIVER_VERSION}-1 + else + apt-get install -y --no-install-recommends nvidia-fabricmanager-${DRIVER_BRANCH}=${DRIVER_VERSION}-1 + fi +} + +nscq_install() { + if [ "$DRIVER_BRANCH" -ge "580" ]; then + apt-get install -y --no-install-recommends libnvidia-nscq=${DRIVER_VERSION}-1 + else + apt-get install -y --no-install-recommends libnvidia-nscq-${DRIVER_BRANCH}=${DRIVER_VERSION}-1 + fi +} + +# libnvsdm packages are not available for arm64 +nvsdm_install() { + if [ "$TARGETARCH" = "amd64" ]; then + if [ "$DRIVER_BRANCH" -ge "580" ]; then + apt-get install -y --no-install-recommends libnvsdm=${DRIVER_VERSION}-1 + elif [ "$DRIVER_BRANCH" -ge "570" ]; then + apt-get install -y --no-install-recommends libnvsdm-${DRIVER_BRANCH}=${DRIVER_VERSION}-1 + fi + fi +} + +nvlink5_pkgs_install() { + if [ "$DRIVER_BRANCH" -ge "550" ]; then + apt-get install -y --no-install-recommends nvlsm infiniband-diags + fi +} + +imex_install() { + if [ "$DRIVER_BRANCH" -ge "580" ]; then + apt-get install -y --no-install-recommends nvidia-imex=${DRIVER_VERSION}-1 + elif [ "$DRIVER_BRANCH" -ge "550" ]; then + apt-get install -y --no-install-recommends nvidia-imex-${DRIVER_BRANCH}=${DRIVER_VERSION}-1; + fi +} + +extra_pkgs_install() { + if [ "$DRIVER_TYPE" != "vgpu" ]; then + apt-get update + + fabricmanager_install + nscq_install + + echo "extra_pkgs_install $TARGETARCH" + if [ "$TARGETARCH" = "amd64" ]; then + echo "arm shouldn't be entering" + nvsdm_install + fi + + nvlink5_pkgs_install + imex_install + + rm -rf /var/lib/apt/lists/* + fi +} + +if [ "$1" = "depinstall" ]; then + dep_install +elif [ "$1" = "download_installer" ]; then + download_installer +elif [ "$1" = "extra_pkgs_install" ]; then + extra_pkgs_install +elif [ "$1" = "setup_cuda_repo" ]; then + setup_cuda_repo +else + echo "Unknown function: $1" + exit 1 +fi diff --git a/debian13/nvidia-driver b/debian13/nvidia-driver new file mode 100755 index 000000000..83d4a51d0 --- /dev/null +++ b/debian13/nvidia-driver @@ -0,0 +1,692 @@ +#! /bin/bash +# Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved. + +set -eu + +RUN_DIR=/run/nvidia +PID_FILE=${RUN_DIR}/${0##*/}.pid +DRIVER_VERSION=${DRIVER_VERSION:?"Missing DRIVER_VERSION env"} +KERNEL_UPDATE_HOOK=/run/kernel/postinst.d/update-nvidia-driver +NUM_VGPU_DEVICES=0 +GPU_DIRECT_RDMA_ENABLED="${GPU_DIRECT_RDMA_ENABLED:-false}" +USE_HOST_MOFED="${USE_HOST_MOFED:-false}" +NVIDIA_MODULE_PARAMS=() +NVIDIA_UVM_MODULE_PARAMS=() +NVIDIA_MODESET_MODULE_PARAMS=() +NVIDIA_PEERMEM_MODULE_PARAMS=() +TARGETARCH=${TARGETARCH:?"Missing TARGETARCH env"} +KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE:-auto} +MODPROBE_CONFIG_DIR="/etc/modprobe.d" + +export DEBIAN_FRONTEND=noninteractive + +DRIVER_ARCH=${TARGETARCH/amd64/x86_64} && DRIVER_ARCH=${DRIVER_ARCH/arm64/aarch64} + +echo "DRIVER_ARCH is $DRIVER_ARCH" + +_update_package_cache() { + if [ "${PACKAGE_TAG:-}" != "builtin" ]; then + echo "Updating the package cache..." + apt-get -qq update + fi +} + +_cleanup_package_cache() { + if [ "${PACKAGE_TAG:-}" != "builtin" ]; then + echo "Cleaning up the package cache..." + rm -rf /var/lib/apt/lists/* + fi +} + +_update_ca_certificates() { + if [ ! -z "$(ls -A /usr/local/share/ca-certificates)" ]; then + update-ca-certificates + fi +} + +# Resolve the kernel version to the form major.minor.patch-revision-flavor where flavor defaults to generic. +_resolve_kernel_version() { + local version=$(apt-cache show "linux-headers-${KERNEL_VERSION}" 2> /dev/null | \ + sed -nE 's/^Version:\s+(([0-9]+\.){2}[0-9]+)[-.]([0-9]+).*/\1-\3/p' | head -1) + + echo "Resolving Linux kernel version..." + if [ -z "${version}" ]; then + echo "Could not resolve Linux kernel version" >&2 + return 1 + fi + + echo "Proceeding with Linux kernel version ${KERNEL_VERSION}" + return 0 +} + +# Install the kernel modules header/builtin/order files and generate the kernel version string. +_install_prerequisites() ( + local tmp_dir=$(mktemp -d) + + trap "rm -rf ${tmp_dir}" EXIT + cd ${tmp_dir} + + rm -rf /usr/lib/modules/${KERNEL_VERSION} + mkdir -p /usr/lib/modules/${KERNEL_VERSION}/proc + + echo "Installing Linux kernel headers..." + apt-get -qq install --no-install-recommends linux-headers-${KERNEL_VERSION} > /dev/null + # linux-headers-${KERNEL_VERSION} depends on linux-image-${KERNEL_VERSION}-amd64 + + # Merge -common into -amd64/-arm64 directory + local common_version="$(echo "${KERNEL_VERSION}" | sed -E 's/-(amd64|arm64)/-common/')" + cp -a "/usr/src/linux-headers-$common_version/"* "/usr/src/linux-headers-${KERNEL_VERSION}/" + + depmod ${KERNEL_VERSION} + + echo "Generating Linux kernel version string..." + + ls -1 /boot/vmlinuz-* | sed 's/\/boot\/vmlinuz-//g' - > version + if [ -z "$(&2 + return 1 + fi + mv version /usr/lib/modules/${KERNEL_VERSION}/proc +) + +# Cleanup the prerequisites installed above. +_remove_prerequisites() { + if [ "${PACKAGE_TAG:-}" != "builtin" ]; then + apt-get -qq purge linux-headers-${KERNEL_VERSION} > /dev/null + # TODO remove module files not matching an existing driver package. + fi +} + +# This is required as currently GPU driver installer doesn't expect headers in x86_64 folder, but only in either default +# or kernel-version folder. +_link_ofa_kernel() ( + if _gpu_direct_rdma_enabled; then + ln -s /run/mellanox/drivers/usr/src/ofa_kernel /usr/src/ + # if arch directory exists(MOFED >=5.5) then create a symlink as expected by GPU driver installer + # ls -ltr /usr/src/ofa_kernel/ + # lrwxrwxrwx 1 root root 36 Dec 8 20:10 default -> /etc/alternatives/ofa_kernel_headers + # drwxr-xr-x 4 root root 4096 Dec 8 20:14 x86_64 + # lrwxrwxrwx 1 root root 44 Dec 9 19:05 5.4.0-90-generic -> /usr/src/ofa_kernel/x86_64/5.4.0-90-generic/ + if [[ -d /run/mellanox/drivers/usr/src/ofa_kernel/$DRIVER_ARCH/$(uname -r) ]]; then + if [[ ! -e /usr/src/ofa_kernel/$(uname -r) ]]; then + ln -s /run/mellanox/drivers/usr/src/ofa_kernel/$DRIVER_ARCH/$(uname -r) /usr/src/ofa_kernel/ + fi + fi + fi +) + +_assert_nvswitch_system() { + [ -d /proc/driver/nvidia-nvswitch/devices ] || return 1 + if [ -z "$(ls -A /proc/driver/nvidia-nvswitch/devices)" ]; then + return 1 + fi + return 0 +} + +_assert_nvlink5_system() ( + for dir in /sys/class/infiniband/*/device; do + # Define the path to the VPD file + vpd_file="$dir/vpd" + + # Check if the VPD file exists + if [ -f "$vpd_file" ]; then + # Search for 'SW_MNG' in the VPD file + if grep -q "SW_MNG" "$vpd_file"; then + echo "Detected NVLink5+ system" + return 0 + fi + fi + done + return 1 +) + +_ensure_nvlink5_prerequisites() ( + until lsmod | grep mlx5_core > /dev/null 2>&1 && lsmod | grep ib_umad > /dev/null 2>&1; + do + echo "waiting for the mlx5_core and ib_umad kernel modules to be loaded" + sleep 10 + done +) + +# Check if mellanox devices are present +_mellanox_devices_present() { + devices_found=0 + for dev in /sys/bus/pci/devices/*; do + read vendor < $dev/vendor + if [ "$vendor" = "0x15b3" ]; then + echo "Mellanox device found at $(basename $dev)" + return 0 + fi + done + echo "No Mellanox devices were found..." + return 1 +} + +_gpu_direct_rdma_enabled() { + if [ "${GPU_DIRECT_RDMA_ENABLED}" = "true" ]; then + # check if mellanox cards are present + if _mellanox_devices_present; then + return 0 + fi + fi + return 1 +} + +# For each kernel module configuration file mounted into the container, +# parse the file contents and extract the custom module parameters that +# are to be passed as input to 'modprobe'. +# +# Assumptions: +# - Configuration files are named .conf (i.e. nvidia.conf, nvidia-uvm.conf). +# - Configuration files are mounted inside the container at /drivers. +# - Each line in the file contains at least one parameter, where parameters on the same line +# are space delimited. It is up to the user to properly format the file to ensure +# the correct set of parameters are passed to 'modprobe'. +_get_module_params() { + local base_path="/drivers" + + # Starting from R580, we need to enable the CDMM (Coherent Driver Memory Management) module parameter. + # This prevents the GPU memory for coherent systems (GH200, GB200 etc) from being exposed as a NUMA node + # and thereby preventing over-reporting of a Kubernetes node's memory. This is needed for Kubernetes use-cases + NVIDIA_MODULE_PARAMS+=("NVreg_CoherentGPUMemoryMode=driver") + + # nvidia + if [ -f "${base_path}/nvidia.conf" ]; then + while IFS="" read -r param || [ -n "$param" ]; do + NVIDIA_MODULE_PARAMS+=("$param") + done <"${base_path}/nvidia.conf" + echo "Module parameters provided for nvidia: ${NVIDIA_MODULE_PARAMS[@]}" + fi + # nvidia-uvm + if [ -f "${base_path}/nvidia-uvm.conf" ]; then + while IFS="" read -r param || [ -n "$param" ]; do + NVIDIA_UVM_MODULE_PARAMS+=("$param") + done <"${base_path}/nvidia-uvm.conf" + echo "Module parameters provided for nvidia-uvm: ${NVIDIA_UVM_MODULE_PARAMS[@]}" + fi + # nvidia-modeset + if [ -f "${base_path}/nvidia-modeset.conf" ]; then + while IFS="" read -r param || [ -n "$param" ]; do + NVIDIA_MODESET_MODULE_PARAMS+=("$param") + done <"${base_path}/nvidia-modeset.conf" + echo "Module parameters provided for nvidia-modeset: ${NVIDIA_MODESET_MODULE_PARAMS[@]}" + fi + # nvidia-peermem + if [ -f "${base_path}/nvidia-peermem.conf" ]; then + while IFS="" read -r param || [ -n "$param" ]; do + NVIDIA_PEERMEM_MODULE_PARAMS+=("$param") + done <"${base_path}/nvidia-peermem.conf" + echo "Module parameters provided for nvidia-peermem: ${NVIDIA_PEERMEM_MODULE_PARAMS[@]}" + fi +} + +_create_module_params_conf() { + echo "Parsing kernel module parameters..." + _get_module_params + + if [ ${#NVIDIA_MODULE_PARAMS[@]} -gt 0 ]; then + echo "Configuring nvidia module parameters in ${MODPROBE_CONFIG_DIR}/nvidia.conf" + echo "options nvidia ${NVIDIA_MODULE_PARAMS[@]}" > ${MODPROBE_CONFIG_DIR}/nvidia.conf + fi + if [ ${#NVIDIA_UVM_MODULE_PARAMS[@]} -gt 0 ]; then + echo "Configuring nvidia-uvm module parameters in ${MODPROBE_CONFIG_DIR}/nvidia-uvm.conf" + echo "options nvidia-uvm ${NVIDIA_UVM_MODULE_PARAMS[@]}" > ${MODPROBE_CONFIG_DIR}/nvidia-uvm.conf + fi + if [ ${#NVIDIA_MODESET_MODULE_PARAMS[@]} -gt 0 ]; then + echo "Configuring nvidia-modeset module parameters in ${MODPROBE_CONFIG_DIR}/nvidia-modeset.conf" + echo "options nvidia-modeset ${NVIDIA_MODESET_MODULE_PARAMS[@]}" > ${MODPROBE_CONFIG_DIR}/nvidia-modeset.conf + fi + if [ ${#NVIDIA_PEERMEM_MODULE_PARAMS[@]} -gt 0 ]; then + echo "Configuring nvidia-peermem module parameters in ${MODPROBE_CONFIG_DIR}/nvidia-peermem.conf" + echo "options nvidia-peermem ${NVIDIA_PEERMEM_MODULE_PARAMS[@]}" > ${MODPROBE_CONFIG_DIR}/nvidia-peermem.conf + fi +} + +# Load the kernel modules and start persistenced. +_load_driver() { + local nv_fw_search_path="$RUN_DIR/driver/lib/firmware" + local set_fw_path="true" + local fw_path_config_file="/sys/module/firmware_class/parameters/path" + for param in "${NVIDIA_MODULE_PARAMS[@]}"; do + if [[ "$param" == "NVreg_EnableGpuFirmware=0" ]]; then + set_fw_path="false" + fi + done + + if [[ "$set_fw_path" == "true" ]]; then + echo "Configuring the following firmware search path in '$fw_path_config_file': $nv_fw_search_path" + if [[ ! -z $(grep '[^[:space:]]' $fw_path_config_file) ]]; then + echo "WARNING: A search path is already configured in $fw_path_config_file" + echo " Retaining the current configuration" + else + echo -n "$nv_fw_search_path" > $fw_path_config_file || echo "WARNING: Failed to configure firmware search path" + fi + fi + + echo "Loading ipmi and i2c_core kernel modules..." + modprobe -a i2c_core ipmi_msghandler ipmi_devintf + + echo "Loading NVIDIA driver kernel modules..." + set -o xtrace +o nounset + modprobe nvidia + modprobe nvidia-uvm + modprobe nvidia-modeset + set +o xtrace -o nounset + + if _gpu_direct_rdma_enabled; then + echo "Loading NVIDIA Peer Memory kernel module..." + set -o xtrace +o nounset + modprobe nvidia-peermem + set +o xtrace -o nounset + fi + + echo "Starting NVIDIA persistence daemon..." + nvidia-persistenced --persistence-mode + + if [ "${DRIVER_TYPE}" = "vgpu" ]; then + echo "Copying gridd.conf..." + cp /drivers/gridd.conf /etc/nvidia/gridd.conf + if [ "${VGPU_LICENSE_SERVER_TYPE}" = "NLS" ]; then + echo "Copying ClientConfigToken..." + mkdir -p /etc/nvidia/ClientConfigToken/ + cp /drivers/ClientConfigToken/* /etc/nvidia/ClientConfigToken/ + fi + + echo "Starting nvidia-gridd.." + LD_LIBRARY_PATH=/usr/lib/$DRIVER_ARCH-linux-gnu/nvidia/gridd nvidia-gridd + + # Start virtual topology daemon + _start_vgpu_topology_daemon + fi + + if _assert_nvlink5_system; then + _ensure_nvlink5_prerequisites || return 1 + echo "Starting NVIDIA fabric manager daemon for NVLink5+..." + + fm_config_file=/usr/share/nvidia/nvswitch/fabricmanager.cfg + fm_pid_file=/var/run/nvidia-fabricmanager/nv-fabricmanager.pid + nvlsm_config_file=/usr/share/nvidia/nvlsm/nvlsm.conf + nvlsm_pid_file=/var/run/nvidia-fabricmanager/nvlsm.pid + /usr/bin/nvidia-fabricmanager-start.sh --mode start \ + --fm-config-file $fm_config_file \ + --fm-pid-file $fm_pid_file \ + --nvlsm-config-file $nvlsm_config_file \ + --nvlsm-pid-file $nvlsm_pid_file + + # If not a NVLink5+ switch, check for the presence of NVLink4 (or below) switches + elif _assert_nvswitch_system; then + echo "Starting NVIDIA fabric manager daemon..." + nv-fabricmanager -c /usr/share/nvidia/nvswitch/fabricmanager.cfg + fi + + return 0 +} + +# Stop persistenced and unload the kernel modules if they are currently loaded. +_unload_driver() { + local rmmod_args=() + local nvidia_deps=0 + local nvidia_refs=0 + local nvidia_uvm_refs=0 + local nvidia_modeset_refs=0 + local nvidia_peermem_refs=0 + + if [ -f /var/run/nvidia-persistenced/nvidia-persistenced.pid ]; then + echo "Stopping NVIDIA persistence daemon..." + local pid=$(< /var/run/nvidia-persistenced/nvidia-persistenced.pid) + + kill -SIGTERM "${pid}" + for i in $(seq 1 50); do + kill -0 "${pid}" 2> /dev/null || break + sleep 0.1 + done + if [ $i -eq 50 ]; then + echo "Could not stop NVIDIA persistence daemon" >&2 + return 1 + fi + fi + + if [ -f /var/run/nvidia-gridd/nvidia-gridd.pid ]; then + echo "Stopping NVIDIA grid daemon..." + local pid=$(< /var/run/nvidia-gridd/nvidia-gridd.pid) + + kill -SIGTERM "${pid}" + for i in $(seq 1 50); do + kill -0 "${pid}" 2> /dev/null || break + sleep 0.1 + done + if [ $i -eq 50 ]; then + echo "Could not stop NVIDIA Grid daemon" >&2 + return 1 + fi + fi + + if [ -f /var/run/nvidia-fabricmanager/nv-fabricmanager.pid ]; then + echo "Stopping NVIDIA fabric manager daemon..." + local pid=$(< /var/run/nvidia-fabricmanager/nv-fabricmanager.pid) + + kill -SIGTERM "${pid}" + for i in $(seq 1 50); do + kill -0 "${pid}" 2> /dev/null || break + sleep 0.1 + done + if [ $i -eq 50 ]; then + echo "Could not stop NVIDIA fabric manager daemon" >&2 + return 1 + fi + fi + + if [ -f /var/run/nvidia-fabricmanager/nvlsm.pid ]; then + echo "Stopping NVLink Subnet Manager daemon..." + local pid=$(< /var/run/nvidia-fabricmanager/nvlsm.pid) + + kill -SIGTERM "${pid}" + for i in $(seq 1 50); do + kill -0 "${pid}" 2> /dev/null || break + sleep 0.1 + done + if [ $i -eq 50 ]; then + echo "Could not stop NVLink Subnet Manager daemon" >&2 + return 1 + fi + fi + + echo "Unloading NVIDIA driver kernel modules..." + if [ -f /sys/module/nvidia_modeset/refcnt ]; then + nvidia_modeset_refs=$(< /sys/module/nvidia_modeset/refcnt) + rmmod_args+=("nvidia-modeset") + ((++nvidia_deps)) + fi + if [ -f /sys/module/nvidia_uvm/refcnt ]; then + nvidia_uvm_refs=$(< /sys/module/nvidia_uvm/refcnt) + rmmod_args+=("nvidia-uvm") + ((++nvidia_deps)) + fi + if [ -f /sys/module/nvidia_peermem/refcnt ]; then + nvidia_peermem_refs=$(< /sys/module/nvidia_peermem/refcnt) + rmmod_args+=("nvidia-peermem") + ((++nvidia_deps)) + fi + if [ -f /sys/module/nvidia/refcnt ]; then + nvidia_refs=$(< /sys/module/nvidia/refcnt) + rmmod_args+=("nvidia") + fi + if [ ${nvidia_refs} -gt ${nvidia_deps} ] || [ ${nvidia_uvm_refs} -gt 0 ] || [ ${nvidia_modeset_refs} -gt 0 ] || [ ${nvidia_peermem_refs} -gt 0 ]; then + # run lsmod to debug module usage + lsmod | grep nvidia + echo "Could not unload NVIDIA driver kernel modules, driver is in use" >&2 + return 1 + fi + + if [ ${#rmmod_args[@]} -gt 0 ]; then + rmmod ${rmmod_args[@]} + fi + return 0 +} + +# Link and install the kernel modules from a precompiled package using the nvidia-installer. +_install_driver() { + local install_args=() + + if [ "${ACCEPT_LICENSE}" = "yes" ]; then + install_args+=("--accept-license") + fi + + if [ -n "${MAX_THREADS}" ]; then + install_args+=("--concurrency-level=${MAX_THREADS}") + fi + + if [[ "${KERNEL_MODULE_TYPE}" == "open" || "${KERNEL_MODULE_TYPE}" == "proprietary" ]]; then + [[ "${KERNEL_MODULE_TYPE}" == "open" ]] && kernel_type=kernel-open || kernel_type=kernel + echo "Proceeding with user-specified kernel module type ${KERNEL_MODULE_TYPE}" + install_args+=("-m=${kernel_type}") + fi + + # Specify the --skip-module-load flag for versions of the nvidia-installer that + # support it. From the nvidia-installer help output: + # + # --skip-module-load + # Skip the test load of the NVIDIA kernel modules after the modules are built, + # and skip loading them after installation is complete. + # + # Without this flag, a subtle bug can occur if the nvidia-installer fails to unload + # the NVIDIA kernel modules after the test load. The modules will remain loaded and + # any custom NVIDIA module parameters configured as input to the driver container + # will not be applied. + # + if [ "${DRIVER_BRANCH}" -ge "550" ]; then + install_args+=("--skip-module-load") + fi + + # Install the NVIDIA driver in one step + sh NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run --silent \ + --ui=none \ + --no-drm \ + --no-nouveau-check \ + --no-nvidia-modprobe \ + --no-rpms \ + --no-backup \ + --no-check-for-alternate-installs \ + --no-libglx-indirect \ + --no-install-libglvnd \ + --x-prefix=/tmp/null \ + --x-module-path=/tmp/null \ + --x-library-path=/tmp/null \ + --x-sysconfig-path=/tmp/null \ + ${install_args[@]+"${install_args[@]}"} +} + +# Mount the driver rootfs into the run directory with the exception of sysfs. +_mount_rootfs() { + echo "Mounting NVIDIA driver rootfs..." + mount --make-runbindable /sys + mount --make-private /sys + mkdir -p ${RUN_DIR}/driver + mount --rbind / ${RUN_DIR}/driver +} + +# Unmount the driver rootfs from the run directory. +_unmount_rootfs() { + echo "Unmounting NVIDIA driver rootfs..." + if findmnt -r -o TARGET | grep "${RUN_DIR}/driver" > /dev/null; then + umount -l -R ${RUN_DIR}/driver + fi +} + +# Write a kernel postinst.d script to automatically precompile packages on kernel update (similar to DKMS). +_write_kernel_update_hook() { + if [ ! -d ${KERNEL_UPDATE_HOOK%/*} ]; then + return + fi + + echo "Writing kernel update hook..." + cat > ${KERNEL_UPDATE_HOOK} <<'EOF' +#!/bin/bash + +set -eu +trap 'echo "ERROR: Failed to update the NVIDIA driver" >&2; exit 0' ERR + +NVIDIA_DRIVER_PID=$(< /run/nvidia/nvidia-driver.pid) + +export "$(grep -z DRIVER_VERSION /proc/${NVIDIA_DRIVER_PID}/environ)" +nsenter -t "${NVIDIA_DRIVER_PID}" -m -- nvidia-driver update --kernel "$1" +EOF + chmod +x ${KERNEL_UPDATE_HOOK} +} + +_shutdown() { + if _unload_driver; then + _unmount_rootfs + rm -f ${PID_FILE} ${KERNEL_UPDATE_HOOK} + return 0 + fi + return 1 +} + +_find_vgpu_driver_version() { + local count="" + local version="" + + if [ "${DISABLE_VGPU_VERSION_CHECK}" = "true" ]; then + echo "vgpu version compatibility check is disabled" + return 0 + fi + # check if vgpu devices are present + count=$(vgpu-util count) + if [ $? -ne 0 ]; then + echo "cannot find vgpu devices on host, pleae check /var/log/vgpu-util.log for more details..." + return 0 + fi + NUM_VGPU_DEVICES=$(echo "$count" | awk -F= '{print $2}') + if [ $NUM_VGPU_DEVICES -eq 0 ]; then + # no vgpu devices found, treat as passthrough + return 0 + fi + echo "found $NUM_VGPU_DEVICES vgpu devices on host" + + # find compatible guest driver using drive catalog + version=$(vgpu-util match -i /drivers -c /drivers/vgpuDriverCatalog.yaml) + if [ $? -ne 0 ]; then + echo "cannot find match for compatible vgpu driver from available list, please check /var/log/vgpu-util.log for more details..." + return 1 + fi + DRIVER_VERSION=$(echo "$version" | awk -F= '{print $2}') + echo "vgpu driver version selected: ${DRIVER_VERSION}" + return 0 +} + +_start_vgpu_topology_daemon() { + type nvidia-topologyd > /dev/null 2>&1 || return 0 + echo "Starting nvidia-topologyd.." + nvidia-topologyd +} + +init() { + if [ "${DRIVER_TYPE}" = "vgpu" ]; then + _find_vgpu_driver_version || exit 1 + fi + + echo -e "\n========== NVIDIA Software Installer ==========\n" + echo -e "Starting installation of NVIDIA driver version ${DRIVER_VERSION} for Linux kernel version ${KERNEL_VERSION}\n" + + exec 3> ${PID_FILE} + if ! flock -n 3; then + echo "An instance of the NVIDIA driver is already running, aborting" + exit 1 + fi + echo $$ >&3 + + trap "echo 'Caught signal'; exit 1" HUP INT QUIT PIPE TERM + trap "_shutdown" EXIT + + _unload_driver || exit 1 + _unmount_rootfs + + _update_ca_certificates + _update_package_cache + _resolve_kernel_version || exit 1 + _install_prerequisites + _link_ofa_kernel + #_remove_prerequisites + #_cleanup_package_cache + + _create_module_params_conf + _install_driver + _load_driver || exit 1 + _mount_rootfs + _write_kernel_update_hook + + echo "Done, now waiting for signal" + sleep infinity & + trap "echo 'Caught signal'; _shutdown && { kill $!; exit 0; }" HUP INT QUIT PIPE TERM + trap - EXIT + while true; do wait $! || continue; done + exit 0 +} + +# Wait for MOFED drivers to be loaded and load nvidia-peermem whenever it gets unloaded during MOFED driver updates +reload_nvidia_peermem() { + if [ "$USE_HOST_MOFED" = "true" ]; then + until lsmod | grep mlx5_core > /dev/null 2>&1 && [ -f /run/nvidia/validations/.driver-ctr-ready ]; + do + echo "waiting for mellanox ofed and nvidia drivers to be installed" + sleep 10 + done + else + # use driver readiness flag created by MOFED container + until [ -f /run/mellanox/drivers/.driver-ready ] && [ -f /run/nvidia/validations/.driver-ctr-ready ]; + do + echo "waiting for mellanox ofed and nvidia drivers to be installed" + sleep 10 + done + fi + # get any parameters provided for nvidia-peermem + _get_module_params && set +o nounset + if chroot /run/nvidia/driver modprobe nvidia-peermem "${NVIDIA_PEERMEM_MODULE_PARAMS[@]}"; then + if [ -f /sys/module/nvidia_peermem/refcnt ]; then + echo "successfully loaded nvidia-peermem module, now waiting for signal" + sleep inf + trap "echo 'Caught signal'; exit 1" HUP INT QUIT PIPE TERM + fi + fi + echo "failed to load nvidia-peermem module" + exit 1 +} + +# probe by gpu-opertor for liveness/startup checks for nvidia-peermem module to be loaded when MOFED drivers are ready +probe_nvidia_peermem() { + if lsmod | grep mlx5_core > /dev/null 2>&1; then + if [ ! -f /sys/module/nvidia_peermem/refcnt ]; then + echo "nvidia-peermem module is not loaded" + return 1 + fi + else + echo "MOFED drivers are not ready, skipping probe to avoid container restarts..." + fi + return 0 +} + +usage() { + cat >&2 <