Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 62 additions & 3 deletions vgpu-manager/rhel8/nvidia-driver
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ DRIVER_VERSION=${DRIVER_VERSION:?"Missing driver version"}
DRIVER_RESET_RETRIES=10
DELAY_BEFORE_VF_CREATION=${DELAY_BEFORE_VF_CREATION:-15}
RUN_DIR=/run/nvidia
NVIDIA_MODULE_PARAMS=()
MODPROBE_CONFIG_DIR="/etc/modprobe.d"

# Mount the driver rootfs into the run directory with the exception of sysfs.
_mount_rootfs() {
Expand Down Expand Up @@ -52,14 +54,70 @@ _set_fw_search_path() {
echo -n "$nv_fw_search_path" > $fw_path_config_file
}

# For each kernel module configuration file mounted into the container,
# parse the file contents and extract the custom module parameters that
# are to be passed as input to 'modprobe'.
#
# Assumptions:
# - Configuration file is named nvidia.conf
# - Configuration file is mounted inside the container at /drivers.
# - Each line in the file contains at least one parameter, where parameters on the same line
# are space delimited. It is up to the user to properly format the file to ensure
# the correct set of parameters are passed to 'modprobe'.
_get_module_params() {
local base_path="/drivers"
# nvidia
if [ -f "${base_path}/nvidia.conf" ]; then
while IFS="" read -r param || [ -n "$param" ]; do
NVIDIA_MODULE_PARAMS+=("$param")
done <"${base_path}/nvidia.conf"
echo "Module parameters provided for nvidia: ${NVIDIA_MODULE_PARAMS[@]}"
fi
}

_install_driver() {
local tmp_dir=$(mktemp -d)
local install_args=()

# Specify the --skip-module-load flag for versions of the nvidia-installer that
# support it. From the nvidia-installer help output:
#
# --skip-module-load
# Skip the test load of the NVIDIA kernel modules after the modules are built,
# and skip loading them after installation is complete.
#
# Without this flag, a subtle bug can occur if the nvidia-installer fails to unload
# the NVIDIA kernel modules after the test load. The modules will remain loaded and
# any custom NVIDIA module parameters configured as input to the driver container
# will not be applied.
#
DRIVER_BRANCH=$(echo ${DRIVER_VERSION} | cut -d. -f1)
if [ "${DRIVER_BRANCH}" -ge "550" ]; then
install_args+=("--skip-module-load")
fi

sh NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}-vgpu-kvm.run --ui=none --no-questions --tmpdir ${tmp_dir} --no-systemd
sh NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}-vgpu-kvm.run --ui=none --no-questions --tmpdir ${tmp_dir} --no-systemd ${install_args[@]+"${install_args[@]}"}
}

# Currently _install_driver() takes care of loading nvidia modules. Just need to start necessary vgpu daemons
_load_driver() {
_create_module_params_conf() {
echo "Parsing kernel module parameters..."
_get_module_params

if [ ${#NVIDIA_MODULE_PARAMS[@]} -gt 0 ]; then
echo "Configuring nvidia module parameters in ${MODPROBE_CONFIG_DIR}/nvidia.conf"
echo "options nvidia ${NVIDIA_MODULE_PARAMS[@]}" > ${MODPROBE_CONFIG_DIR}/nvidia.conf
fi
}

# Load NVIDIA driver kernel modules with custom parameters and start vGPU daemons
_load_driver() {
echo "Loading NVIDIA driver kernel modules..."
set -o xtrace +o nounset
modprobe nvidia
modprobe nvidia_vgpu_vfio
set +o xtrace -o nounset

# Start vGPU daemons
/usr/bin/nvidia-vgpud
/usr/bin/nvidia-vgpu-mgr &

Expand Down Expand Up @@ -181,6 +239,7 @@ init() {
_unmount_rootfs
_create_dev_char_directory
_set_fw_search_path
_create_module_params_conf
_install_driver
_load_driver || exit 1
_mount_rootfs
Expand Down
62 changes: 60 additions & 2 deletions vgpu-manager/rhel9/nvidia-driver
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ DRIVER_VERSION=${DRIVER_VERSION:?"Missing driver version"}
DRIVER_RESET_RETRIES=10
DELAY_BEFORE_VF_CREATION=${DELAY_BEFORE_VF_CREATION:-15}
RUN_DIR=/run/nvidia
NVIDIA_MODULE_PARAMS=()
MODPROBE_CONFIG_DIR="/etc/modprobe.d"

# Mount the driver rootfs into the run directory with the exception of sysfs.
_mount_rootfs() {
Expand Down Expand Up @@ -64,14 +66,69 @@ _set_fw_search_path() {
echo -n "$nv_fw_search_path" > $fw_path_config_file
}

# For each kernel module configuration file mounted into the container,
# parse the file contents and extract the custom module parameters that
# are to be passed as input to 'modprobe'.
#
# Assumptions:
# - Configuration file is named nvidia.conf
# - Configuration file is mounted inside the container at /drivers.
# - Each line in the file contains at least one parameter, where parameters on the same line
# are space delimited. It is up to the user to properly format the file to ensure
# the correct set of parameters are passed to 'modprobe'.
_get_module_params() {
local base_path="/drivers"
# nvidia
if [ -f "${base_path}/nvidia.conf" ]; then
while IFS="" read -r param || [ -n "$param" ]; do
NVIDIA_MODULE_PARAMS+=("$param")
done <"${base_path}/nvidia.conf"
echo "Module parameters provided for nvidia: ${NVIDIA_MODULE_PARAMS[@]}"
fi
}

_install_driver() {
local tmp_dir=$(mktemp -d)
local install_args=()

# Specify the --skip-module-load flag for versions of the nvidia-installer that
# support it. From the nvidia-installer help output:
#
# --skip-module-load
# Skip the test load of the NVIDIA kernel modules after the modules are built,
# and skip loading them after installation is complete.
#
# Without this flag, a subtle bug can occur if the nvidia-installer fails to unload
# the NVIDIA kernel modules after the test load. The modules will remain loaded and
# any custom NVIDIA module parameters configured as input to the driver container
# will not be applied.
#
DRIVER_BRANCH=$(echo ${DRIVER_VERSION} | cut -d. -f1)
if [ "${DRIVER_BRANCH}" -ge "550" ]; then
install_args+=("--skip-module-load")
fi
sh NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}-vgpu-kvm.run --ui=none --no-questions --tmpdir ${tmp_dir} --no-systemd ${install_args[@]+"${install_args[@]}"}
}

_create_module_params_conf() {
echo "Parsing kernel module parameters..."
_get_module_params

sh NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}-vgpu-kvm.run --ui=none --no-questions --tmpdir ${tmp_dir} --no-systemd
if [ ${#NVIDIA_MODULE_PARAMS[@]} -gt 0 ]; then
echo "Configuring nvidia module parameters in ${MODPROBE_CONFIG_DIR}/nvidia.conf"
echo "options nvidia ${NVIDIA_MODULE_PARAMS[@]}" > ${MODPROBE_CONFIG_DIR}/nvidia.conf
fi
}

# Currently _install_driver() takes care of loading nvidia modules. Just need to start necessary vgpu daemons
# Load NVIDIA driver kernel modules with custom parameters and start vGPU daemons
_load_driver() {
echo "Loading NVIDIA driver kernel modules..."
set -o xtrace +o nounset
modprobe nvidia
modprobe nvidia_vgpu_vfio
set +o xtrace -o nounset

# Start vGPU daemons
/usr/bin/nvidia-vgpud
/usr/bin/nvidia-vgpu-mgr &

Expand Down Expand Up @@ -193,6 +250,7 @@ init() {
_unmount_rootfs
_create_dev_char_directory
_set_fw_search_path
_create_module_params_conf
_install_driver
_load_driver || exit 1
_mount_rootfs
Expand Down
63 changes: 60 additions & 3 deletions vgpu-manager/ubuntu22.04/nvidia-driver
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ DRIVER_RESET_RETRIES=10
DELAY_BEFORE_VF_CREATION=${DELAY_BEFORE_VF_CREATION:-15}
KERNEL_VERSION=$(uname -r)
RUN_DIR=/run/nvidia
NVIDIA_MODULE_PARAMS=()
MODPROBE_CONFIG_DIR="/etc/modprobe.d"

export DEBIAN_FRONTEND=noninteractive

Expand Down Expand Up @@ -105,8 +107,24 @@ _unmount_rootfs() {

_install_driver() {
local tmp_dir=$(mktemp -d)

sh NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}-vgpu-kvm.run --ui=none --no-questions --tmpdir ${tmp_dir} --no-systemd
local install_args=()
# Specify the --skip-module-load flag for versions of the nvidia-installer that
# support it. From the nvidia-installer help output:
#
# --skip-module-load
# Skip the test load of the NVIDIA kernel modules after the modules are built,
# and skip loading them after installation is complete.
#
# Without this flag, a subtle bug can occur if the nvidia-installer fails to unload
# the NVIDIA kernel modules after the test load. The modules will remain loaded and
# any custom NVIDIA module parameters configured as input to the driver container
# will not be applied.
#
DRIVER_BRANCH=$(echo ${DRIVER_VERSION} | cut -d. -f1)
if [ "${DRIVER_BRANCH}" -ge "550" ]; then
install_args+=("--skip-module-load")
fi
sh NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}-vgpu-kvm.run --ui=none --no-questions --tmpdir ${tmp_dir} --no-systemd ${install_args[@]+"${install_args[@]}"}
}

# Create /dev/char directory if it doesn't exist inside the container.
Expand All @@ -133,8 +151,46 @@ _set_fw_search_path() {
echo -n "$nv_fw_search_path" > $fw_path_config_file
}

# Currently _install_driver() takes care of loading nvidia modules. Just need to start necessary vgpu daemons
# For each kernel module configuration file mounted into the container,
# parse the file contents and extract the custom module parameters that
# are to be passed as input to 'modprobe'.
#
# Assumptions:
# - Configuration file is named nvidia.conf
# - Configuration file is mounted inside the container at /drivers.
# - Each line in the file contains at least one parameter, where parameters on the same line
# are space delimited. It is up to the user to properly format the file to ensure
# the correct set of parameters are passed to 'modprobe'.
_get_module_params() {
local base_path="/drivers"
# nvidia
if [ -f "${base_path}/nvidia.conf" ]; then
while IFS="" read -r param || [ -n "$param" ]; do
NVIDIA_MODULE_PARAMS+=("$param")
done <"${base_path}/nvidia.conf"
echo "Module parameters provided for nvidia: ${NVIDIA_MODULE_PARAMS[@]}"
fi
}

_create_module_params_conf() {
echo "Parsing kernel module parameters..."
_get_module_params

if [ ${#NVIDIA_MODULE_PARAMS[@]} -gt 0 ]; then
echo "Configuring nvidia module parameters in ${MODPROBE_CONFIG_DIR}/nvidia.conf"
echo "options nvidia ${NVIDIA_MODULE_PARAMS[@]}" > ${MODPROBE_CONFIG_DIR}/nvidia.conf
fi
}

# Load NVIDIA driver kernel modules with custom parameters and start vGPU daemons
_load_driver() {
echo "Loading NVIDIA driver kernel modules..."
set -o xtrace +o nounset
modprobe nvidia
modprobe nvidia_vgpu_vfio
set +o xtrace -o nounset

# Start vGPU daemons
/usr/bin/nvidia-vgpud
/usr/bin/nvidia-vgpu-mgr &

Expand Down Expand Up @@ -260,6 +316,7 @@ init() {
_install_prerequisites
_create_dev_char_directory
_set_fw_search_path
_create_module_params_conf
_install_driver
_load_driver || exit 1
_mount_rootfs
Expand Down
61 changes: 59 additions & 2 deletions vgpu-manager/ubuntu24.04/nvidia-driver
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ DRIVER_RESET_RETRIES=10
DELAY_BEFORE_VF_CREATION=${DELAY_BEFORE_VF_CREATION:-15}
KERNEL_VERSION=$(uname -r)
RUN_DIR=/run/nvidia
NVIDIA_MODULE_PARAMS=()
MODPROBE_CONFIG_DIR="/etc/modprobe.d"

export DEBIAN_FRONTEND=noninteractive

Expand Down Expand Up @@ -129,12 +131,66 @@ _set_fw_search_path() {

_install_driver() {
local tmp_dir=$(mktemp -d)
local install_args=()
# Specify the --skip-module-load flag for versions of the nvidia-installer that
# support it. From the nvidia-installer help output:
#
# --skip-module-load
# Skip the test load of the NVIDIA kernel modules after the modules are built,
# and skip loading them after installation is complete.
#
# Without this flag, a subtle bug can occur if the nvidia-installer fails to unload
# the NVIDIA kernel modules after the test load. The modules will remain loaded and
# any custom NVIDIA module parameters configured as input to the driver container
# will not be applied.
#
DRIVER_BRANCH=$(echo ${DRIVER_VERSION} | cut -d. -f1)
if [ "${DRIVER_BRANCH}" -ge "550" ]; then
install_args+=("--skip-module-load")
fi
sh NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}-vgpu-kvm.run --ui=none --no-questions --tmpdir ${tmp_dir} --no-systemd ${install_args[@]+"${install_args[@]}"}
}

# For each kernel module configuration file mounted into the container,
# parse the file contents and extract the custom module parameters that
# are to be passed as input to 'modprobe'.
#
# Assumptions:
# - Configuration file is named nvidia.conf
# - Configuration file is mounted inside the container at /drivers.
# - Each line in the file contains at least one parameter, where parameters on the same line
# are space delimited. It is up to the user to properly format the file to ensure
# the correct set of parameters are passed to 'modprobe'.
_get_module_params() {
local base_path="/drivers"
# nvidia
if [ -f "${base_path}/nvidia.conf" ]; then
while IFS="" read -r param || [ -n "$param" ]; do
NVIDIA_MODULE_PARAMS+=("$param")
done <"${base_path}/nvidia.conf"
echo "Module parameters provided for nvidia: ${NVIDIA_MODULE_PARAMS[@]}"
fi
}

_create_module_params_conf() {
echo "Parsing kernel module parameters..."
_get_module_params

sh NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}-vgpu-kvm.run --ui=none --no-questions --tmpdir ${tmp_dir} --no-systemd
if [ ${#NVIDIA_MODULE_PARAMS[@]} -gt 0 ]; then
echo "Configuring nvidia module parameters in ${MODPROBE_CONFIG_DIR}/nvidia.conf"
echo "options nvidia ${NVIDIA_MODULE_PARAMS[@]}" > ${MODPROBE_CONFIG_DIR}/nvidia.conf
fi
}

# Currently _install_driver() takes care of loading nvidia modules. Just need to start necessary vgpu daemons
# Load NVIDIA driver kernel modules with custom parameters and start vGPU daemons
_load_driver() {
echo "Loading NVIDIA driver kernel modules..."
set -o xtrace +o nounset
modprobe nvidia
modprobe nvidia_vgpu_vfio
set +o xtrace -o nounset

# Start vGPU daemons
/usr/bin/nvidia-vgpud
/usr/bin/nvidia-vgpu-mgr &

Expand Down Expand Up @@ -260,6 +316,7 @@ init() {
_install_prerequisites
_create_dev_char_directory
_set_fw_search_path
_create_module_params_conf
_install_driver
_load_driver || exit 1
_mount_rootfs
Expand Down