@@ -8,12 +8,13 @@ PID_FILE=${RUN_DIR}/${0##*/}.pid
88DRIVER_VERSION=${DRIVER_VERSION:? " Missing DRIVER_VERSION env" }
99KERNEL_UPDATE_HOOK=/run/kernel/postinst.d/update-nvidia-driver
1010NUM_VGPU_DEVICES=0
11+ GPU_DIRECT_RDMA_ENABLED=" ${GPU_DIRECT_RDMA_ENABLED:- false} "
12+ USE_HOST_MOFED=" ${USE_HOST_MOFED:- false} "
1113NVIDIA_MODULE_PARAMS=()
1214NVIDIA_UVM_MODULE_PARAMS=()
1315NVIDIA_MODESET_MODULE_PARAMS=()
1416NVIDIA_PEERMEM_MODULE_PARAMS=()
1517TARGETARCH=${TARGETARCH:? " Missing TARGETARCH env" }
16- USE_HOST_MOFED=" ${USE_HOST_MOFED:- false} "
1718DNF_RELEASEVER=${DNF_RELEASEVER:- " " }
1819RHEL_VERSION=${RHEL_VERSION:- " " }
1920RHEL_MAJOR_VERSION=9
@@ -211,7 +212,10 @@ _create_driver_package() (
211212 local nvidia_modeset_sign_args=" "
212213 local nvidia_uvm_sign_args=" "
213214
214- trap " make -s -j ${MAX_THREADS} SYSSRC=/lib/modules/${KERNEL_VERSION} /build clean > /dev/null" EXIT
215+ # Skip cleanup trap for DTK builds - modules are copied after this function returns
216+ if [ " ${PACKAGE_TAG:- } " != " builtin" ]; then
217+ trap " make -s -j ${MAX_THREADS} SYSSRC=/lib/modules/${KERNEL_VERSION} /build clean > /dev/null" EXIT
218+ fi
215219
216220 echo " Compiling NVIDIA driver kernel modules..."
217221 cd /usr/src/nvidia-${DRIVER_VERSION} /${KERNEL_TYPE}
@@ -566,7 +570,9 @@ _install_driver() {
566570 install_args+=(" --skip-module-load" )
567571 fi
568572
569- IGNORE_CC_MISMATCH=1 nvidia-installer --kernel-module-only --no-drm --ui=none --no-nouveau-check -m=${KERNEL_TYPE} ${install_args[@]+" ${install_args[@]} " }
573+ # Prevent prompts when modules are already loaded (common in DTK context).
574+ # Pipe "1" to auto-answer "Continue installation" when prompted about loaded modules.
575+ echo " 1" | IGNORE_CC_MISMATCH=1 nvidia-installer --kernel-module-only --no-drm --ui=none --no-nouveau-check -m=${KERNEL_TYPE} ${install_args[@]+" ${install_args[@]} " }
570576 # May need to add no-cc-check for Rhel, otherwise it complains about cc missing in path
571577 # /proc/version and lib/modules/KERNEL_VERSION/proc are different, by default installer looks at /proc/ so, added the proc-mount-point
572578 # TODO: remove the -a flag. its not needed. in the new driver version, license-acceptance is implicit
@@ -701,6 +707,114 @@ _start_vgpu_topology_daemon() {
701707 nvidia-topologyd
702708}
703709
710+ _ensure_persistence () {
711+ local pid_file=/var/run/nvidia-persistenced/nvidia-persistenced.pid pid
712+ if pid=$( < " ${pid_file} " 2> /dev/null) && [ -n " ${pid} " ] && kill -0 " ${pid} " 2> /dev/null; then
713+ return 0
714+ fi
715+
716+ if command -v nvidia-persistenced > /dev/null 2>&1 ; then
717+ nvidia-persistenced --persistence-mode || true
718+ else
719+ echo " nvidia-persistenced not found; continuing without persistence"
720+ fi
721+ }
722+
723+ _build_driver_config () {
724+ local nvidia_params=" " nvidia_uvm_params=" " nvidia_modeset_params=" " nvidia_peermem_params=" "
725+
726+ # Read module parameters from conf files
727+ if [ -f " /drivers/nvidia.conf" ]; then
728+ nvidia_params=$( cat " /drivers/nvidia.conf" | tr ' \n' ' ' )
729+ fi
730+ if [ -f " /drivers/nvidia-uvm.conf" ]; then
731+ nvidia_uvm_params=$( cat " /drivers/nvidia-uvm.conf" | tr ' \n' ' ' )
732+ fi
733+ if [ -f " /drivers/nvidia-modeset.conf" ]; then
734+ nvidia_modeset_params=$( cat " /drivers/nvidia-modeset.conf" | tr ' \n' ' ' )
735+ fi
736+ if [ -f " /drivers/nvidia-peermem.conf" ]; then
737+ nvidia_peermem_params=$( cat " /drivers/nvidia-peermem.conf" | tr ' \n' ' ' )
738+ fi
739+
740+ local config=" DRIVER_VERSION=${DRIVER_VERSION}
741+ KERNEL_VERSION=$( uname -r)
742+ GPU_DIRECT_RDMA_ENABLED=${GPU_DIRECT_RDMA_ENABLED:- false}
743+ USE_HOST_MOFED=${USE_HOST_MOFED:- false}
744+ KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE:- auto}
745+ NVIDIA_MODULE_PARAMS=${nvidia_params}
746+ NVIDIA_UVM_MODULE_PARAMS=${nvidia_uvm_params}
747+ NVIDIA_MODESET_MODULE_PARAMS=${nvidia_modeset_params}
748+ NVIDIA_PEERMEM_MODULE_PARAMS=${nvidia_peermem_params} "
749+
750+ # Append config file contents directly
751+ for conf_file in nvidia.conf nvidia-uvm.conf nvidia-modeset.conf nvidia-peermem.conf; do
752+ if [ -f " /drivers/$conf_file " ]; then
753+ config=" ${config}
754+ $( cat " /drivers/$conf_file " ) "
755+ fi
756+ done
757+
758+ echo " $config "
759+ }
760+
761+ _store_driver_config () {
762+ local config_file=" /run/nvidia/driver-config.state"
763+ echo " Storing driver configuration state..."
764+ _build_driver_config > " $config_file "
765+ echo " Driver configuration stored at $config_file "
766+ }
767+
768+ _should_use_fast_path () {
769+ [ -f /sys/module/nvidia/refcnt ] && [ -f /run/nvidia/driver-config.state ] || return 1
770+ local current_config=$( _build_driver_config)
771+ local stored_config=$( cat /run/nvidia/driver-config.state 2> /dev/null || echo " " )
772+ [ " ${current_config} " = " ${stored_config} " ]
773+ }
774+
775+ _userspace_only_install () {
776+ echo " Detected matching loaded driver & config (${DRIVER_VERSION} ); performing userspace-only install"
777+
778+ _unmount_rootfs
779+ _update_package_cache
780+
781+ # Skip kernel-related steps for userspace-only install
782+ # KERNEL_VERSION is already set from uname -r, no need to resolve from yum
783+ # Kernel headers/devel/modules are not needed for userspace-only install
784+
785+ cd /drivers
786+ [ ! -d " NVIDIA-Linux-${DRIVER_ARCH} -${DRIVER_VERSION} " ] && sh NVIDIA-Linux-${DRIVER_ARCH} -${DRIVER_VERSION} .run -x
787+ cd NVIDIA-Linux-${DRIVER_ARCH} -${DRIVER_VERSION}
788+
789+
790+ echo " DEBUG: Current directory: $( pwd) "
791+ echo " DEBUG: Checking for ./nvidia-installer:"
792+ ls -la ./nvidia-installer 2>&1 || echo " ./nvidia-installer NOT FOUND"
793+ echo " DEBUG: Checking PATH for nvidia-installer:"
794+ which nvidia-installer 2>&1 || echo " nvidia-installer NOT in PATH"
795+
796+
797+ echo " Installing userspace components (libraries and binaries)..."
798+ local install_args=" --silent --no-kernel-module --no-nouveau-check --no-nvidia-modprobe --no-drm --no-peermem --ui=none"
799+ [ " ${ACCEPT_LICENSE} " = " yes" ] && install_args=" $install_args --accept-license"
800+ IGNORE_CC_MISMATCH=1 ./nvidia-installer $install_args
801+
802+ # Copy kernel module sources if not already present (needed for other containers)
803+ if [ ! -d " /usr/src/nvidia-${DRIVER_VERSION} " ]; then
804+ _resolve_kernel_type || exit 1
805+ mkdir -p /usr/src/nvidia-${DRIVER_VERSION}
806+ cp -r LICENSE mkprecompiled ${KERNEL_TYPE} /usr/src/nvidia-${DRIVER_VERSION} /
807+ sed ' 9,${/^\(kernel\|LICENSE\)/!d}' .manifest > /usr/src/nvidia-${DRIVER_VERSION} /.manifest
808+ fi
809+
810+ _mount_rootfs
811+ _ensure_persistence
812+ _write_kernel_update_hook
813+ _store_driver_config
814+
815+ echo " Userspace-only install complete"
816+ }
817+
704818_prepare () {
705819 if [ " ${DRIVER_TYPE} " = " vgpu" ]; then
706820 _find_vgpu_driver_version || exit 1
@@ -758,6 +872,7 @@ _load() {
758872 _load_driver
759873 _mount_rootfs
760874 _write_kernel_update_hook
875+ _store_driver_config
761876
762877 echo " Done, now waiting for signal"
763878 sleep infinity &
@@ -768,7 +883,49 @@ _load() {
768883}
769884
770885init () {
771- _prepare_exclusive
886+ if [ " ${DRIVER_TYPE} " = " vgpu" ]; then
887+ _find_vgpu_driver_version || exit 1
888+ fi
889+
890+ echo -e " \n========== NVIDIA Software Installer ==========\n"
891+ echo -e " Starting installation of NVIDIA driver version ${DRIVER_VERSION} for Linux kernel version ${KERNEL_VERSION} \n"
892+
893+ exec 3> ${PID_FILE}
894+ if ! flock -n 3; then
895+ echo " An instance of the NVIDIA driver is already running, aborting"
896+ exit 1
897+ fi
898+ echo $$ >&3
899+
900+ trap " echo 'Caught signal'; exit 1" HUP INT QUIT PIPE TERM
901+ trap " _shutdown" EXIT
902+
903+ if _should_use_fast_path; then
904+ _userspace_only_install
905+
906+ echo " Userspace-only install complete, now waiting for signal"
907+ sleep infinity &
908+ trap " echo 'Caught signal'; _shutdown && { kill $! ; exit 0; }" HUP INT QUIT PIPE TERM
909+ trap - EXIT
910+ while true ; do wait $! || continue ; done
911+ exit 0
912+ fi
913+
914+ _unload_driver || exit 1
915+ _unmount_rootfs
916+
917+ # Install the userspace components and copy the kernel module sources.
918+ sh NVIDIA-Linux-$DRIVER_ARCH -$DRIVER_VERSION .run -x && \
919+ cd NVIDIA-Linux-$DRIVER_ARCH -$DRIVER_VERSION && \
920+ sh /tmp/install.sh nvinstall
921+
922+ # Determine the kernel module type
923+ _resolve_kernel_type || exit 1
924+
925+ # Copy the kernel module sources
926+ mkdir -p /usr/src/nvidia-$DRIVER_VERSION && \
927+ mv LICENSE mkprecompiled ${KERNEL_TYPE} /usr/src/nvidia-$DRIVER_VERSION && \
928+ sed ' 9,${/^\(kernel\|LICENSE\)/!d}' .manifest > /usr/src/nvidia-$DRIVER_VERSION /.manifest
772929
773930 _build
774931
0 commit comments