Skip to content

Commit ee2286f

Browse files
Shiva Kumarshivakunv
authored andcommitted
gpu-manager: enable kernel module configuration via KernelModuleConfig
Signed-off-by: Shiva Kumar (SW-CLOUD) <[email protected]>
1 parent e4f05d3 commit ee2286f

File tree

4 files changed

+241
-10
lines changed

4 files changed

+241
-10
lines changed

vgpu-manager/rhel8/nvidia-driver

Lines changed: 62 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ DRIVER_VERSION=${DRIVER_VERSION:?"Missing driver version"}
77
DRIVER_RESET_RETRIES=10
88
DELAY_BEFORE_VF_CREATION=${DELAY_BEFORE_VF_CREATION:-15}
99
RUN_DIR=/run/nvidia
10+
NVIDIA_MODULE_PARAMS=()
11+
MODPROBE_CONFIG_DIR="/etc/modprobe.d"
1012

1113
# Mount the driver rootfs into the run directory with the exception of sysfs.
1214
_mount_rootfs() {
@@ -52,14 +54,70 @@ _set_fw_search_path() {
5254
echo -n "$nv_fw_search_path" > $fw_path_config_file
5355
}
5456

57+
# For each kernel module configuration file mounted into the container,
58+
# parse the file contents and extract the custom module parameters that
59+
# are to be passed as input to 'modprobe'.
60+
#
61+
# Assumptions:
62+
# - Configuration file is named nvidia.conf
63+
# - Configuration file is mounted inside the container at /drivers.
64+
# - Each line in the file contains at least one parameter, where parameters on the same line
65+
# are space delimited. It is up to the user to properly format the file to ensure
66+
# the correct set of parameters are passed to 'modprobe'.
67+
_get_module_params() {
68+
local base_path="/drivers"
69+
# nvidia
70+
if [ -f "${base_path}/nvidia.conf" ]; then
71+
while IFS="" read -r param || [ -n "$param" ]; do
72+
NVIDIA_MODULE_PARAMS+=("$param")
73+
done <"${base_path}/nvidia.conf"
74+
echo "Module parameters provided for nvidia: ${NVIDIA_MODULE_PARAMS[@]}"
75+
fi
76+
}
77+
5578
_install_driver() {
5679
local tmp_dir=$(mktemp -d)
80+
local install_args=()
81+
82+
# Specify the --skip-module-load flag for versions of the nvidia-installer that
83+
# support it. From the nvidia-installer help output:
84+
#
85+
# --skip-module-load
86+
# Skip the test load of the NVIDIA kernel modules after the modules are built,
87+
# and skip loading them after installation is complete.
88+
#
89+
# Without this flag, a subtle bug can occur if the nvidia-installer fails to unload
90+
# the NVIDIA kernel modules after the test load. The modules will remain loaded and
91+
# any custom NVIDIA module parameters configured as input to the driver container
92+
# will not be applied.
93+
#
94+
DRIVER_BRANCH=$(echo ${DRIVER_VERSION} | cut -d. -f1)
95+
if [ "${DRIVER_BRANCH}" -ge "550" ]; then
96+
install_args+=("--skip-module-load")
97+
fi
5798

58-
sh NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}-vgpu-kvm.run --ui=none --no-questions --tmpdir ${tmp_dir} --no-systemd
99+
sh NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}-vgpu-kvm.run --ui=none --no-questions --tmpdir ${tmp_dir} --no-systemd ${install_args[@]+"${install_args[@]}"}
59100
}
60101

61-
# Currently _install_driver() takes care of loading nvidia modules. Just need to start necessary vgpu daemons
62-
_load_driver() {
102+
_create_module_params_conf() {
103+
echo "Parsing kernel module parameters..."
104+
_get_module_params
105+
106+
if [ ${#NVIDIA_MODULE_PARAMS[@]} -gt 0 ]; then
107+
echo "Configuring nvidia module parameters in ${MODPROBE_CONFIG_DIR}/nvidia.conf"
108+
echo "options nvidia ${NVIDIA_MODULE_PARAMS[@]}" > ${MODPROBE_CONFIG_DIR}/nvidia.conf
109+
fi
110+
}
111+
112+
# Load NVIDIA driver kernel modules with custom parameters and start vGPU daemons
113+
_load_driver() {
114+
echo "Loading NVIDIA driver kernel modules..."
115+
set -o xtrace +o nounset
116+
modprobe nvidia
117+
modprobe nvidia_vgpu_vfio
118+
set +o xtrace -o nounset
119+
120+
# Start vGPU daemons
63121
/usr/bin/nvidia-vgpud
64122
/usr/bin/nvidia-vgpu-mgr &
65123

@@ -181,6 +239,7 @@ init() {
181239
_unmount_rootfs
182240
_create_dev_char_directory
183241
_set_fw_search_path
242+
_create_module_params_conf
184243
_install_driver
185244
_load_driver || exit 1
186245
_mount_rootfs

vgpu-manager/rhel9/nvidia-driver

Lines changed: 60 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ DRIVER_VERSION=${DRIVER_VERSION:?"Missing driver version"}
1919
DRIVER_RESET_RETRIES=10
2020
DELAY_BEFORE_VF_CREATION=${DELAY_BEFORE_VF_CREATION:-15}
2121
RUN_DIR=/run/nvidia
22+
NVIDIA_MODULE_PARAMS=()
23+
MODPROBE_CONFIG_DIR="/etc/modprobe.d"
2224

2325
# Mount the driver rootfs into the run directory with the exception of sysfs.
2426
_mount_rootfs() {
@@ -64,14 +66,69 @@ _set_fw_search_path() {
6466
echo -n "$nv_fw_search_path" > $fw_path_config_file
6567
}
6668

69+
# For each kernel module configuration file mounted into the container,
70+
# parse the file contents and extract the custom module parameters that
71+
# are to be passed as input to 'modprobe'.
72+
#
73+
# Assumptions:
74+
# - Configuration file is named nvidia.conf
75+
# - Configuration file is mounted inside the container at /drivers.
76+
# - Each line in the file contains at least one parameter, where parameters on the same line
77+
# are space delimited. It is up to the user to properly format the file to ensure
78+
# the correct set of parameters are passed to 'modprobe'.
79+
_get_module_params() {
80+
local base_path="/drivers"
81+
# nvidia
82+
if [ -f "${base_path}/nvidia.conf" ]; then
83+
while IFS="" read -r param || [ -n "$param" ]; do
84+
NVIDIA_MODULE_PARAMS+=("$param")
85+
done <"${base_path}/nvidia.conf"
86+
echo "Module parameters provided for nvidia: ${NVIDIA_MODULE_PARAMS[@]}"
87+
fi
88+
}
89+
6790
_install_driver() {
6891
local tmp_dir=$(mktemp -d)
92+
local install_args=()
93+
94+
# Specify the --skip-module-load flag for versions of the nvidia-installer that
95+
# support it. From the nvidia-installer help output:
96+
#
97+
# --skip-module-load
98+
# Skip the test load of the NVIDIA kernel modules after the modules are built,
99+
# and skip loading them after installation is complete.
100+
#
101+
# Without this flag, a subtle bug can occur if the nvidia-installer fails to unload
102+
# the NVIDIA kernel modules after the test load. The modules will remain loaded and
103+
# any custom NVIDIA module parameters configured as input to the driver container
104+
# will not be applied.
105+
#
106+
DRIVER_BRANCH=$(echo ${DRIVER_VERSION} | cut -d. -f1)
107+
if [ "${DRIVER_BRANCH}" -ge "550" ]; then
108+
install_args+=("--skip-module-load")
109+
fi
110+
sh NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}-vgpu-kvm.run --ui=none --no-questions --tmpdir ${tmp_dir} --no-systemd ${install_args[@]+"${install_args[@]}"}
111+
}
112+
113+
_create_module_params_conf() {
114+
echo "Parsing kernel module parameters..."
115+
_get_module_params
69116

70-
sh NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}-vgpu-kvm.run --ui=none --no-questions --tmpdir ${tmp_dir} --no-systemd
117+
if [ ${#NVIDIA_MODULE_PARAMS[@]} -gt 0 ]; then
118+
echo "Configuring nvidia module parameters in ${MODPROBE_CONFIG_DIR}/nvidia.conf"
119+
echo "options nvidia ${NVIDIA_MODULE_PARAMS[@]}" > ${MODPROBE_CONFIG_DIR}/nvidia.conf
120+
fi
71121
}
72122

73-
# Currently _install_driver() takes care of loading nvidia modules. Just need to start necessary vgpu daemons
123+
# Load NVIDIA driver kernel modules with custom parameters and start vGPU daemons
74124
_load_driver() {
125+
echo "Loading NVIDIA driver kernel modules..."
126+
set -o xtrace +o nounset
127+
modprobe nvidia
128+
modprobe nvidia_vgpu_vfio
129+
set +o xtrace -o nounset
130+
131+
# Start vGPU daemons
75132
/usr/bin/nvidia-vgpud
76133
/usr/bin/nvidia-vgpu-mgr &
77134

@@ -193,6 +250,7 @@ init() {
193250
_unmount_rootfs
194251
_create_dev_char_directory
195252
_set_fw_search_path
253+
_create_module_params_conf
196254
_install_driver
197255
_load_driver || exit 1
198256
_mount_rootfs

vgpu-manager/ubuntu22.04/nvidia-driver

Lines changed: 60 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ DRIVER_RESET_RETRIES=10
88
DELAY_BEFORE_VF_CREATION=${DELAY_BEFORE_VF_CREATION:-15}
99
KERNEL_VERSION=$(uname -r)
1010
RUN_DIR=/run/nvidia
11+
NVIDIA_MODULE_PARAMS=()
12+
MODPROBE_CONFIG_DIR="/etc/modprobe.d"
1113

1214
export DEBIAN_FRONTEND=noninteractive
1315

@@ -105,8 +107,24 @@ _unmount_rootfs() {
105107

106108
_install_driver() {
107109
local tmp_dir=$(mktemp -d)
108-
109-
sh NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}-vgpu-kvm.run --ui=none --no-questions --tmpdir ${tmp_dir} --no-systemd
110+
local install_args=()
111+
# Specify the --skip-module-load flag for versions of the nvidia-installer that
112+
# support it. From the nvidia-installer help output:
113+
#
114+
# --skip-module-load
115+
# Skip the test load of the NVIDIA kernel modules after the modules are built,
116+
# and skip loading them after installation is complete.
117+
#
118+
# Without this flag, a subtle bug can occur if the nvidia-installer fails to unload
119+
# the NVIDIA kernel modules after the test load. The modules will remain loaded and
120+
# any custom NVIDIA module parameters configured as input to the driver container
121+
# will not be applied.
122+
#
123+
DRIVER_BRANCH=$(echo ${DRIVER_VERSION} | cut -d. -f1)
124+
if [ "${DRIVER_BRANCH}" -ge "550" ]; then
125+
install_args+=("--skip-module-load")
126+
fi
127+
sh NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}-vgpu-kvm.run --ui=none --no-questions --tmpdir ${tmp_dir} --no-systemd ${install_args[@]+"${install_args[@]}"}
110128
}
111129

112130
# Create /dev/char directory if it doesn't exist inside the container.
@@ -133,8 +151,46 @@ _set_fw_search_path() {
133151
echo -n "$nv_fw_search_path" > $fw_path_config_file
134152
}
135153

136-
# Currently _install_driver() takes care of loading nvidia modules. Just need to start necessary vgpu daemons
154+
# For each kernel module configuration file mounted into the container,
155+
# parse the file contents and extract the custom module parameters that
156+
# are to be passed as input to 'modprobe'.
157+
#
158+
# Assumptions:
159+
# - Configuration file is named nvidia.conf
160+
# - Configuration file is mounted inside the container at /drivers.
161+
# - Each line in the file contains at least one parameter, where parameters on the same line
162+
# are space delimited. It is up to the user to properly format the file to ensure
163+
# the correct set of parameters are passed to 'modprobe'.
164+
_get_module_params() {
165+
local base_path="/drivers"
166+
# nvidia
167+
if [ -f "${base_path}/nvidia.conf" ]; then
168+
while IFS="" read -r param || [ -n "$param" ]; do
169+
NVIDIA_MODULE_PARAMS+=("$param")
170+
done <"${base_path}/nvidia.conf"
171+
echo "Module parameters provided for nvidia: ${NVIDIA_MODULE_PARAMS[@]}"
172+
fi
173+
}
174+
175+
_create_module_params_conf() {
176+
echo "Parsing kernel module parameters..."
177+
_get_module_params
178+
179+
if [ ${#NVIDIA_MODULE_PARAMS[@]} -gt 0 ]; then
180+
echo "Configuring nvidia module parameters in ${MODPROBE_CONFIG_DIR}/nvidia.conf"
181+
echo "options nvidia ${NVIDIA_MODULE_PARAMS[@]}" > ${MODPROBE_CONFIG_DIR}/nvidia.conf
182+
fi
183+
}
184+
185+
# Load NVIDIA driver kernel modules with custom parameters and start vGPU daemons
137186
_load_driver() {
187+
echo "Loading NVIDIA driver kernel modules..."
188+
set -o xtrace +o nounset
189+
modprobe nvidia
190+
modprobe nvidia_vgpu_vfio
191+
set +o xtrace -o nounset
192+
193+
# Start vGPU daemons
138194
/usr/bin/nvidia-vgpud
139195
/usr/bin/nvidia-vgpu-mgr &
140196

@@ -260,6 +316,7 @@ init() {
260316
_install_prerequisites
261317
_create_dev_char_directory
262318
_set_fw_search_path
319+
_create_module_params_conf
263320
_install_driver
264321
_load_driver || exit 1
265322
_mount_rootfs

vgpu-manager/ubuntu24.04/nvidia-driver

Lines changed: 59 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ DRIVER_RESET_RETRIES=10
88
DELAY_BEFORE_VF_CREATION=${DELAY_BEFORE_VF_CREATION:-15}
99
KERNEL_VERSION=$(uname -r)
1010
RUN_DIR=/run/nvidia
11+
NVIDIA_MODULE_PARAMS=()
12+
MODPROBE_CONFIG_DIR="/etc/modprobe.d"
1113

1214
export DEBIAN_FRONTEND=noninteractive
1315

@@ -129,12 +131,66 @@ _set_fw_search_path() {
129131

130132
_install_driver() {
131133
local tmp_dir=$(mktemp -d)
134+
local install_args=()
135+
# Specify the --skip-module-load flag for versions of the nvidia-installer that
136+
# support it. From the nvidia-installer help output:
137+
#
138+
# --skip-module-load
139+
# Skip the test load of the NVIDIA kernel modules after the modules are built,
140+
# and skip loading them after installation is complete.
141+
#
142+
# Without this flag, a subtle bug can occur if the nvidia-installer fails to unload
143+
# the NVIDIA kernel modules after the test load. The modules will remain loaded and
144+
# any custom NVIDIA module parameters configured as input to the driver container
145+
# will not be applied.
146+
#
147+
DRIVER_BRANCH=$(echo ${DRIVER_VERSION} | cut -d. -f1)
148+
if [ "${DRIVER_BRANCH}" -ge "550" ]; then
149+
install_args+=("--skip-module-load")
150+
fi
151+
sh NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}-vgpu-kvm.run --ui=none --no-questions --tmpdir ${tmp_dir} --no-systemd ${install_args[@]+"${install_args[@]}"}
152+
}
153+
154+
# For each kernel module configuration file mounted into the container,
155+
# parse the file contents and extract the custom module parameters that
156+
# are to be passed as input to 'modprobe'.
157+
#
158+
# Assumptions:
159+
# - Configuration file is named nvidia.conf
160+
# - Configuration file is mounted inside the container at /drivers.
161+
# - Each line in the file contains at least one parameter, where parameters on the same line
162+
# are space delimited. It is up to the user to properly format the file to ensure
163+
# the correct set of parameters are passed to 'modprobe'.
164+
_get_module_params() {
165+
local base_path="/drivers"
166+
# nvidia
167+
if [ -f "${base_path}/nvidia.conf" ]; then
168+
while IFS="" read -r param || [ -n "$param" ]; do
169+
NVIDIA_MODULE_PARAMS+=("$param")
170+
done <"${base_path}/nvidia.conf"
171+
echo "Module parameters provided for nvidia: ${NVIDIA_MODULE_PARAMS[@]}"
172+
fi
173+
}
174+
175+
_create_module_params_conf() {
176+
echo "Parsing kernel module parameters..."
177+
_get_module_params
132178

133-
sh NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}-vgpu-kvm.run --ui=none --no-questions --tmpdir ${tmp_dir} --no-systemd
179+
if [ ${#NVIDIA_MODULE_PARAMS[@]} -gt 0 ]; then
180+
echo "Configuring nvidia module parameters in ${MODPROBE_CONFIG_DIR}/nvidia.conf"
181+
echo "options nvidia ${NVIDIA_MODULE_PARAMS[@]}" > ${MODPROBE_CONFIG_DIR}/nvidia.conf
182+
fi
134183
}
135184

136-
# Currently _install_driver() takes care of loading nvidia modules. Just need to start necessary vgpu daemons
185+
# Load NVIDIA driver kernel modules with custom parameters and start vGPU daemons
137186
_load_driver() {
187+
echo "Loading NVIDIA driver kernel modules..."
188+
set -o xtrace +o nounset
189+
modprobe nvidia
190+
modprobe nvidia_vgpu_vfio
191+
set +o xtrace -o nounset
192+
193+
# Start vGPU daemons
138194
/usr/bin/nvidia-vgpud
139195
/usr/bin/nvidia-vgpu-mgr &
140196

@@ -260,6 +316,7 @@ init() {
260316
_install_prerequisites
261317
_create_dev_char_directory
262318
_set_fw_search_path
319+
_create_module_params_conf
263320
_install_driver
264321
_load_driver || exit 1
265322
_mount_rootfs

0 commit comments

Comments
 (0)