Skip to content

Commit f0d0d41

Browse files
Extract common code into reusable helper functions
Signed-off-by: Karthik Vetrivel <kvetrivel@nvidia.com>
1 parent d4a6dff commit f0d0d41

File tree

1 file changed

+91
-137
lines changed

1 file changed

+91
-137
lines changed

ubuntu22.04/nvidia-driver

Lines changed: 91 additions & 137 deletions
Original file line numberDiff line numberDiff line change
@@ -640,62 +640,79 @@ _start_vgpu_topology_daemon() {
640640
}
641641

642642
_ensure_persistenced() {
643-
local pid_file=/var/run/nvidia-persistenced/nvidia-persistenced.pid pid
644-
if pid=$(<"${pid_file}" 2>/dev/null) && [ -n "${pid}" ] && kill -0 "${pid}" 2>/dev/null; then
645-
return 0
646-
fi
643+
local pid_file=/var/run/nvidia-persistenced/nvidia-persistenced.pid pid
644+
if pid=$(<"${pid_file}" 2>/dev/null) && [ -n "${pid}" ] && kill -0 "${pid}" 2>/dev/null; then
645+
return 0
646+
fi
647+
648+
if command -v nvidia-persistenced >/dev/null 2>&1; then
649+
nvidia-persistenced --persistence-mode || true
650+
else
651+
echo "nvidia-persistenced not found; continuing without persistence"
652+
fi
653+
}
647654

648-
if command -v nvidia-persistenced >/dev/null 2>&1; then
649-
nvidia-persistenced --persistence-mode || true
650-
else
651-
echo "nvidia-persistenced not found; continuing without persistence"
652-
fi
655+
_read_conf_file() {
656+
local file="$1"
657+
[ -f "$file" ] && tr '\n' ' ' < "$file"
653658
}
654659

655660
_build_driver_config() {
656-
local nvidia_params="" nvidia_uvm_params="" nvidia_modeset_params="" nvidia_peermem_params=""
657-
658-
# Read module parameters from conf files
659-
if [ -f "/drivers/nvidia.conf" ]; then
660-
nvidia_params=$(cat "/drivers/nvidia.conf" | tr '\n' ' ')
661-
fi
662-
if [ -f "/drivers/nvidia-uvm.conf" ]; then
663-
nvidia_uvm_params=$(cat "/drivers/nvidia-uvm.conf" | tr '\n' ' ')
664-
fi
665-
if [ -f "/drivers/nvidia-modeset.conf" ]; then
666-
nvidia_modeset_params=$(cat "/drivers/nvidia-modeset.conf" | tr '\n' ' ')
667-
fi
668-
if [ -f "/drivers/nvidia-peermem.conf" ]; then
669-
nvidia_peermem_params=$(cat "/drivers/nvidia-peermem.conf" | tr '\n' ' ')
670-
fi
671-
672-
local config="DRIVER_VERSION=${DRIVER_VERSION}
661+
cat <<EOF
662+
DRIVER_VERSION=${DRIVER_VERSION}
673663
DRIVER_TYPE=${DRIVER_TYPE:-passthrough}
674664
KERNEL_VERSION=$(uname -r)
675665
GPU_DIRECT_RDMA_ENABLED=${GPU_DIRECT_RDMA_ENABLED}
676666
USE_HOST_MOFED=${USE_HOST_MOFED}
677667
KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE}
678-
NVIDIA_MODULE_PARAMS=${nvidia_params}
679-
NVIDIA_UVM_MODULE_PARAMS=${nvidia_uvm_params}
680-
NVIDIA_MODESET_MODULE_PARAMS=${nvidia_modeset_params}
681-
NVIDIA_PEERMEM_MODULE_PARAMS=${nvidia_peermem_params}"
668+
NVIDIA_MODULE_PARAMS=$(_read_conf_file /drivers/nvidia.conf)
669+
NVIDIA_UVM_MODULE_PARAMS=$(_read_conf_file /drivers/nvidia-uvm.conf)
670+
NVIDIA_MODESET_MODULE_PARAMS=$(_read_conf_file /drivers/nvidia-modeset.conf)
671+
NVIDIA_PEERMEM_MODULE_PARAMS=$(_read_conf_file /drivers/nvidia-peermem.conf)
672+
EOF
673+
}
682674

683-
# Append config file contents directly
684-
for conf_file in nvidia.conf nvidia-uvm.conf nvidia-modeset.conf nvidia-peermem.conf; do
685-
if [ -f "/drivers/$conf_file" ]; then
686-
config="${config}
687-
$(cat "/drivers/$conf_file")"
688-
fi
689-
done
675+
_store_driver_config() {
676+
local config_file="/run/nvidia/driver-config.state"
677+
echo "Storing driver configuration state..."
678+
_build_driver_config > "$config_file"
679+
echo "Driver configuration stored at $config_file"
680+
}
690681

691-
echo "$config"
682+
_install_userspace_components() {
683+
echo "Installing userspace components (libraries and binaries)..."
684+
cd /drivers
685+
sh NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}.run -x
686+
cd NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}
687+
./nvidia-installer \
688+
--silent \
689+
--no-kernel-module \
690+
--no-nouveau-check \
691+
--no-nvidia-modprobe \
692+
--no-rpms \
693+
--no-backup \
694+
--no-check-for-alternate-installs \
695+
--no-libglx-indirect \
696+
--no-install-libglvnd \
697+
--x-prefix=/tmp/null \
698+
--x-module-path=/tmp/null \
699+
--x-library-path=/tmp/null \
700+
--x-sysconfig-path=/tmp/null
692701
}
693702

694-
_store_driver_config() {
695-
local config_file="/run/nvidia/driver-config.state"
696-
echo "Storing driver configuration state..."
697-
_build_driver_config > "$config_file"
698-
echo "Driver configuration stored at $config_file"
703+
_copy_kernel_module_sources() {
704+
mkdir -p /usr/src/nvidia-${DRIVER_VERSION}
705+
cp -r LICENSE mkprecompiled ${KERNEL_TYPE} /usr/src/nvidia-${DRIVER_VERSION}/
706+
sed '9,${/^\(kernel\|LICENSE\)/!d}' .manifest > /usr/src/nvidia-${DRIVER_VERSION}/.manifest
707+
}
708+
709+
_wait_for_signal() {
710+
echo "Done, now waiting for signal"
711+
sleep infinity &
712+
trap "echo 'Caught signal'; _shutdown && { kill $!; exit 0; }" HUP INT QUIT PIPE TERM
713+
trap - EXIT
714+
while true; do wait $! || continue; done
715+
exit 0
699716
}
700717

701718
init() {
@@ -716,96 +733,41 @@ init() {
716733
trap "echo 'Caught signal'; exit 1" HUP INT QUIT PIPE TERM
717734
trap "_shutdown" EXIT
718735

719-
# Fast path: if the NVIDIA kernel modules are already loaded and driver config matches,
720-
# skip kernel module build/load but install userspace components.
721-
# This handles non-clean restarts where modules are in use and can't be unloaded.
722-
if [ -f /sys/module/nvidia/refcnt ] && [ -f /run/nvidia/driver-config.state ]; then
723-
current_config=$(_build_driver_config)
724-
stored_config=$(cat /run/nvidia/driver-config.state)
725-
726-
if [ "${current_config}" = "${stored_config}" ]; then
727-
echo "Detected matching loaded driver & config (${DRIVER_VERSION}); performing userspace-only install"
728-
729-
# Skip kernel module unload since they're already loaded with correct version
730-
# Unmount any existing rootfs
731-
_unmount_rootfs
732-
733-
# Update package cache for userspace install
734-
_update_package_cache
735-
_resolve_kernel_version || exit 1
736-
_install_prerequisites
737-
738-
# Install userspace components only (libraries, binaries)
739-
# The --no-kernel-module flag tells nvidia-installer to skip kernel module build/install
740-
echo "Installing userspace components (libraries and binaries)..."
741-
cd /drivers
742-
# Extract the driver first
743-
sh NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}.run -x
744-
cd NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}
745-
./nvidia-installer \
746-
--silent \
747-
--no-kernel-module \
748-
--no-nouveau-check \
749-
--no-nvidia-modprobe \
750-
--no-drm \
751-
--no-peermem
752-
753-
# Determine the kernel module type
754-
_resolve_kernel_type || exit 1
755-
756-
# Copy the kernel module sources for sidecar containers (gdrcopy, nvidia-fs, etc.)
757-
mkdir -p /usr/src/nvidia-${DRIVER_VERSION} && \
758-
cp -r LICENSE mkprecompiled ${KERNEL_TYPE} /usr/src/nvidia-${DRIVER_VERSION}/ && \
759-
sed '9,${/^\(kernel\|LICENSE\)/!d}' .manifest > /usr/src/nvidia-${DRIVER_VERSION}/.manifest
760-
761-
# Mount the driver rootfs to make components available
762-
_mount_rootfs
763-
764-
# Ensure persistence daemon is running
765-
_ensure_persistenced
766-
767-
# Write kernel update hook
768-
_write_kernel_update_hook
769-
770-
# Store driver configuration
771-
_store_driver_config
772-
773-
echo "Userspace-only install complete, now waiting for signal"
774-
sleep infinity &
775-
trap "echo 'Caught signal'; _shutdown && { kill $!; exit 0; }" HUP INT QUIT PIPE TERM
776-
trap - EXIT
777-
while true; do wait $! || continue; done
778-
exit 0
779-
fi
780-
fi
736+
# Fast path: if NVIDIA kernel modules are already loaded and config matches,
737+
# skip kernel module build/load and only reinstall userspace components.
738+
# This handles non-clean restarts where modules are in use and can't be unloaded.
739+
if [ -f /sys/module/nvidia/refcnt ] && [ -f /run/nvidia/driver-config.state ]; then
740+
current_config=$(_build_driver_config)
741+
stored_config=$(cat /run/nvidia/driver-config.state)
742+
743+
if [ "${current_config}" = "${stored_config}" ]; then
744+
echo "Detected matching loaded driver & config (${DRIVER_VERSION}); performing userspace-only install"
745+
_unmount_rootfs
746+
_update_package_cache
747+
_resolve_kernel_version || exit 1
748+
_install_prerequisites
749+
_install_userspace_components
750+
_resolve_kernel_type || exit 1
751+
_copy_kernel_module_sources
752+
_mount_rootfs
753+
_ensure_persistenced
754+
_write_kernel_update_hook
755+
_store_driver_config
756+
echo "Userspace-only install complete"
757+
_wait_for_signal
758+
fi
759+
fi
781760

761+
# Full install path: unload existing driver and perform complete installation
782762
_unload_driver || exit 1
783763
_unmount_rootfs
784-
785-
# Install the userspace components
786-
sh NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run -x && \
787-
cd NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION && \
788-
./nvidia-installer --silent \
789-
--no-kernel-module \
790-
--no-nouveau-check \
791-
--no-nvidia-modprobe \
792-
--no-rpms \
793-
--no-backup \
794-
--no-check-for-alternate-installs \
795-
--no-libglx-indirect \
796-
--no-install-libglvnd \
797-
--x-prefix=/tmp/null \
798-
--x-module-path=/tmp/null \
799-
--x-library-path=/tmp/null \
800-
--x-sysconfig-path=/tmp/null
801-
802-
# Determine the kernel module type
764+
_install_userspace_components
803765
_resolve_kernel_type || exit 1
804766

805-
# Copy the kernel module sources
806-
mkdir -p /usr/src/nvidia-${DRIVER_VERSION} && \
807-
mv LICENSE mkprecompiled ${KERNEL_TYPE} /usr/src/nvidia-${DRIVER_VERSION} && \
808-
sed '9,${/^\(kernel\|LICENSE\)/!d}' .manifest > /usr/src/nvidia-${DRIVER_VERSION}/.manifest
767+
# Move (not copy) kernel module sources since this is the full install path
768+
mkdir -p /usr/src/nvidia-${DRIVER_VERSION}
769+
mv LICENSE mkprecompiled ${KERNEL_TYPE} /usr/src/nvidia-${DRIVER_VERSION}/
770+
sed '9,${/^\(kernel\|LICENSE\)/!d}' .manifest > /usr/src/nvidia-${DRIVER_VERSION}/.manifest
809771

810772
if _kernel_requires_package; then
811773
_update_ca_certificates
@@ -814,22 +776,14 @@ init() {
814776
_resolve_kernel_version || exit 1
815777
_install_prerequisites
816778
_create_driver_package
817-
#_remove_prerequisites
818-
#_cleanup_package_cache
819779
fi
820780

821781
_install_driver
822782
_load_driver || exit 1
823783
_mount_rootfs
824784
_write_kernel_update_hook
825785
_store_driver_config
826-
827-
echo "Done, now waiting for signal"
828-
sleep infinity &
829-
trap "echo 'Caught signal'; _shutdown && { kill $!; exit 0; }" HUP INT QUIT PIPE TERM
830-
trap - EXIT
831-
while true; do wait $! || continue; done
832-
exit 0
786+
_wait_for_signal
833787
}
834788

835789
update() {

0 commit comments

Comments
 (0)