Skip to content

Commit bf0bb88

Browse files
Add support for OpenShift 14.04
Signed-off-by: Karthik Vetrivel <kvetrivel@nvidia.com>
1 parent ba7e6de commit bf0bb88

File tree

3 files changed

+235
-7
lines changed

3 files changed

+235
-7
lines changed

rhel9/nvidia-driver

100755100644
Lines changed: 161 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,13 @@ PID_FILE=${RUN_DIR}/${0##*/}.pid
88
DRIVER_VERSION=${DRIVER_VERSION:?"Missing DRIVER_VERSION env"}
99
KERNEL_UPDATE_HOOK=/run/kernel/postinst.d/update-nvidia-driver
1010
NUM_VGPU_DEVICES=0
11+
GPU_DIRECT_RDMA_ENABLED="${GPU_DIRECT_RDMA_ENABLED:-false}"
12+
USE_HOST_MOFED="${USE_HOST_MOFED:-false}"
1113
NVIDIA_MODULE_PARAMS=()
1214
NVIDIA_UVM_MODULE_PARAMS=()
1315
NVIDIA_MODESET_MODULE_PARAMS=()
1416
NVIDIA_PEERMEM_MODULE_PARAMS=()
1517
TARGETARCH=${TARGETARCH:?"Missing TARGETARCH env"}
16-
USE_HOST_MOFED="${USE_HOST_MOFED:-false}"
1718
DNF_RELEASEVER=${DNF_RELEASEVER:-""}
1819
RHEL_VERSION=${RHEL_VERSION:-""}
1920
RHEL_MAJOR_VERSION=9
@@ -211,7 +212,10 @@ _create_driver_package() (
211212
local nvidia_modeset_sign_args=""
212213
local nvidia_uvm_sign_args=""
213214

214-
trap "make -s -j ${MAX_THREADS} SYSSRC=/lib/modules/${KERNEL_VERSION}/build clean > /dev/null" EXIT
215+
# Skip cleanup trap for DTK builds - modules are copied after this function returns
216+
if [ "${PACKAGE_TAG:-}" != "builtin" ]; then
217+
trap "make -s -j ${MAX_THREADS} SYSSRC=/lib/modules/${KERNEL_VERSION}/build clean > /dev/null" EXIT
218+
fi
215219

216220
echo "Compiling NVIDIA driver kernel modules..."
217221
cd /usr/src/nvidia-${DRIVER_VERSION}/${KERNEL_TYPE}
@@ -566,7 +570,9 @@ _install_driver() {
566570
install_args+=("--skip-module-load")
567571
fi
568572

569-
IGNORE_CC_MISMATCH=1 nvidia-installer --kernel-module-only --no-drm --ui=none --no-nouveau-check -m=${KERNEL_TYPE} ${install_args[@]+"${install_args[@]}"}
573+
# Prevent prompts when modules are already loaded (common in DTK context).
574+
# Pipe "1" to auto-answer "Continue installation" when prompted about loaded modules.
575+
echo "1" | IGNORE_CC_MISMATCH=1 nvidia-installer --kernel-module-only --no-drm --ui=none --no-nouveau-check -m=${KERNEL_TYPE} ${install_args[@]+"${install_args[@]}"}
570576
# May need to add no-cc-check for Rhel, otherwise it complains about cc missing in path
571577
# /proc/version and lib/modules/KERNEL_VERSION/proc are different, by default installer looks at /proc/ so, added the proc-mount-point
572578
# TODO: remove the -a flag. its not needed. in the new driver version, license-acceptance is implicit
@@ -701,6 +707,114 @@ _start_vgpu_topology_daemon() {
701707
nvidia-topologyd
702708
}
703709

710+
_ensure_persistence() {
711+
local pid_file=/var/run/nvidia-persistenced/nvidia-persistenced.pid pid
712+
if pid=$(<"${pid_file}" 2>/dev/null) && [ -n "${pid}" ] && kill -0 "${pid}" 2>/dev/null; then
713+
return 0
714+
fi
715+
716+
if command -v nvidia-persistenced >/dev/null 2>&1; then
717+
nvidia-persistenced --persistence-mode || true
718+
else
719+
echo "nvidia-persistenced not found; continuing without persistence"
720+
fi
721+
}
722+
723+
_build_driver_config() {
724+
local nvidia_params="" nvidia_uvm_params="" nvidia_modeset_params="" nvidia_peermem_params=""
725+
726+
# Read module parameters from conf files
727+
if [ -f "/drivers/nvidia.conf" ]; then
728+
nvidia_params=$(cat "/drivers/nvidia.conf" | tr '\n' ' ')
729+
fi
730+
if [ -f "/drivers/nvidia-uvm.conf" ]; then
731+
nvidia_uvm_params=$(cat "/drivers/nvidia-uvm.conf" | tr '\n' ' ')
732+
fi
733+
if [ -f "/drivers/nvidia-modeset.conf" ]; then
734+
nvidia_modeset_params=$(cat "/drivers/nvidia-modeset.conf" | tr '\n' ' ')
735+
fi
736+
if [ -f "/drivers/nvidia-peermem.conf" ]; then
737+
nvidia_peermem_params=$(cat "/drivers/nvidia-peermem.conf" | tr '\n' ' ')
738+
fi
739+
740+
local config="DRIVER_VERSION=${DRIVER_VERSION}
741+
KERNEL_VERSION=$(uname -r)
742+
GPU_DIRECT_RDMA_ENABLED=${GPU_DIRECT_RDMA_ENABLED:-false}
743+
USE_HOST_MOFED=${USE_HOST_MOFED:-false}
744+
KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE:-auto}
745+
NVIDIA_MODULE_PARAMS=${nvidia_params}
746+
NVIDIA_UVM_MODULE_PARAMS=${nvidia_uvm_params}
747+
NVIDIA_MODESET_MODULE_PARAMS=${nvidia_modeset_params}
748+
NVIDIA_PEERMEM_MODULE_PARAMS=${nvidia_peermem_params}"
749+
750+
# Append config file contents directly
751+
for conf_file in nvidia.conf nvidia-uvm.conf nvidia-modeset.conf nvidia-peermem.conf; do
752+
if [ -f "/drivers/$conf_file" ]; then
753+
config="${config}
754+
$(cat "/drivers/$conf_file")"
755+
fi
756+
done
757+
758+
echo "$config"
759+
}
760+
761+
_store_driver_config() {
762+
local config_file="/run/nvidia/driver-config.state"
763+
echo "Storing driver configuration state..."
764+
_build_driver_config > "$config_file"
765+
echo "Driver configuration stored at $config_file"
766+
}
767+
768+
_should_use_fast_path() {
769+
[ -f /sys/module/nvidia/refcnt ] && [ -f /run/nvidia/driver-config.state ] || return 1
770+
local current_config=$(_build_driver_config)
771+
local stored_config=$(cat /run/nvidia/driver-config.state 2>/dev/null || echo "")
772+
[ "${current_config}" = "${stored_config}" ]
773+
}
774+
775+
_userspace_only_install() {
776+
echo "Detected matching loaded driver & config (${DRIVER_VERSION}); performing userspace-only install"
777+
778+
_unmount_rootfs
779+
_update_package_cache
780+
781+
# Skip kernel-related steps for userspace-only install
782+
# KERNEL_VERSION is already set from uname -r, no need to resolve from yum
783+
# Kernel headers/devel/modules are not needed for userspace-only install
784+
785+
cd /drivers
786+
[ ! -d "NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}" ] && sh NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}.run -x
787+
cd NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}
788+
789+
790+
echo "DEBUG: Current directory: $(pwd)"
791+
echo "DEBUG: Checking for ./nvidia-installer:"
792+
ls -la ./nvidia-installer 2>&1 || echo " ./nvidia-installer NOT FOUND"
793+
echo "DEBUG: Checking PATH for nvidia-installer:"
794+
which nvidia-installer 2>&1 || echo " nvidia-installer NOT in PATH"
795+
796+
797+
echo "Installing userspace components (libraries and binaries)..."
798+
local install_args="--silent --no-kernel-module --no-nouveau-check --no-nvidia-modprobe --no-drm --no-peermem --ui=none"
799+
[ "${ACCEPT_LICENSE}" = "yes" ] && install_args="$install_args --accept-license"
800+
IGNORE_CC_MISMATCH=1 ./nvidia-installer $install_args
801+
802+
# Copy kernel module sources if not already present (needed for other containers)
803+
if [ ! -d "/usr/src/nvidia-${DRIVER_VERSION}" ]; then
804+
_resolve_kernel_type || exit 1
805+
mkdir -p /usr/src/nvidia-${DRIVER_VERSION}
806+
cp -r LICENSE mkprecompiled ${KERNEL_TYPE} /usr/src/nvidia-${DRIVER_VERSION}/
807+
sed '9,${/^\(kernel\|LICENSE\)/!d}' .manifest > /usr/src/nvidia-${DRIVER_VERSION}/.manifest
808+
fi
809+
810+
_mount_rootfs
811+
_ensure_persistence
812+
_write_kernel_update_hook
813+
_store_driver_config
814+
815+
echo "Userspace-only install complete"
816+
}
817+
704818
_prepare() {
705819
if [ "${DRIVER_TYPE}" = "vgpu" ]; then
706820
_find_vgpu_driver_version || exit 1
@@ -758,6 +872,7 @@ _load() {
758872
_load_driver
759873
_mount_rootfs
760874
_write_kernel_update_hook
875+
_store_driver_config
761876

762877
echo "Done, now waiting for signal"
763878
sleep infinity &
@@ -768,7 +883,49 @@ _load() {
768883
}
769884

770885
init() {
771-
_prepare_exclusive
886+
if [ "${DRIVER_TYPE}" = "vgpu" ]; then
887+
_find_vgpu_driver_version || exit 1
888+
fi
889+
890+
echo -e "\n========== NVIDIA Software Installer ==========\n"
891+
echo -e "Starting installation of NVIDIA driver version ${DRIVER_VERSION} for Linux kernel version ${KERNEL_VERSION}\n"
892+
893+
exec 3> ${PID_FILE}
894+
if ! flock -n 3; then
895+
echo "An instance of the NVIDIA driver is already running, aborting"
896+
exit 1
897+
fi
898+
echo $$ >&3
899+
900+
trap "echo 'Caught signal'; exit 1" HUP INT QUIT PIPE TERM
901+
trap "_shutdown" EXIT
902+
903+
if _should_use_fast_path; then
904+
_userspace_only_install
905+
906+
echo "Userspace-only install complete, now waiting for signal"
907+
sleep infinity &
908+
trap "echo 'Caught signal'; _shutdown && { kill $!; exit 0; }" HUP INT QUIT PIPE TERM
909+
trap - EXIT
910+
while true; do wait $! || continue; done
911+
exit 0
912+
fi
913+
914+
_unload_driver || exit 1
915+
_unmount_rootfs
916+
917+
# Install the userspace components and copy the kernel module sources.
918+
sh NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run -x && \
919+
cd NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION && \
920+
sh /tmp/install.sh nvinstall
921+
922+
# Determine the kernel module type
923+
_resolve_kernel_type || exit 1
924+
925+
# Copy the kernel module sources
926+
mkdir -p /usr/src/nvidia-$DRIVER_VERSION && \
927+
mv LICENSE mkprecompiled ${KERNEL_TYPE} /usr/src/nvidia-$DRIVER_VERSION && \
928+
sed '9,${/^\(kernel\|LICENSE\)/!d}' .manifest > /usr/src/nvidia-$DRIVER_VERSION/.manifest
772929

773930
_build
774931

rhel9/ocp_dtk_entrypoint

100755100644
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,50 @@ echo "Running $*"
1010
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
1111
source $SCRIPT_DIR/common.sh
1212

13+
_build_driver_config() {
14+
local nvidia_params="" nvidia_uvm_params="" nvidia_modeset_params="" nvidia_peermem_params=""
15+
16+
# Read module parameters from conf files
17+
if [ -f "/drivers/nvidia.conf" ]; then
18+
nvidia_params=$(cat "/drivers/nvidia.conf" | tr '\n' ' ')
19+
fi
20+
if [ -f "/drivers/nvidia-uvm.conf" ]; then
21+
nvidia_uvm_params=$(cat "/drivers/nvidia-uvm.conf" | tr '\n' ' ')
22+
fi
23+
if [ -f "/drivers/nvidia-modeset.conf" ]; then
24+
nvidia_modeset_params=$(cat "/drivers/nvidia-modeset.conf" | tr '\n' ' ')
25+
fi
26+
if [ -f "/drivers/nvidia-peermem.conf" ]; then
27+
nvidia_peermem_params=$(cat "/drivers/nvidia-peermem.conf" | tr '\n' ' ')
28+
fi
29+
30+
local config="DRIVER_VERSION=${DRIVER_VERSION}
31+
KERNEL_VERSION=$(uname -r)
32+
GPU_DIRECT_RDMA_ENABLED=${GPU_DIRECT_RDMA_ENABLED:-false}
33+
USE_HOST_MOFED=${USE_HOST_MOFED:-false}
34+
KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE:-auto}
35+
NVIDIA_MODULE_PARAMS=${nvidia_params}
36+
NVIDIA_UVM_MODULE_PARAMS=${nvidia_uvm_params}
37+
NVIDIA_MODESET_MODULE_PARAMS=${nvidia_modeset_params}
38+
NVIDIA_PEERMEM_MODULE_PARAMS=${nvidia_peermem_params}"
39+
40+
for conf_file in nvidia.conf nvidia-uvm.conf nvidia-modeset.conf nvidia-peermem.conf; do
41+
if [ -f "/drivers/$conf_file" ]; then
42+
config="${config}
43+
$(cat "/drivers/$conf_file")"
44+
fi
45+
done
46+
47+
echo "$config"
48+
}
49+
50+
_should_use_fast_path() {
51+
[ -f /sys/module/nvidia/refcnt ] && [ -f /run/nvidia/driver-config.state ] || return 1
52+
local current_config=$(_build_driver_config)
53+
local stored_config=$(cat /run/nvidia/driver-config.state 2>/dev/null || echo "")
54+
[ "${current_config}" = "${stored_config}" ]
55+
}
56+
1357
nv-ctr-run-with-dtk() {
1458
set -x
1559

@@ -18,6 +62,13 @@ nv-ctr-run-with-dtk() {
1862
exec bash -x nvidia-driver init
1963
fi
2064

65+
if _should_use_fast_path; then
66+
echo "Fast path detected: skipping DTK build and module copy, proceeding with userspace-only install"
67+
exec bash -x nvidia-driver init
68+
fi
69+
70+
echo "Fast path not detected: building driver and modules"
71+
2172
if [[ ! -f "$DRIVER_TOOLKIT_SHARED_DIR/dir_prepared" ]]; then
2273
cp -r \
2374
/tmp/install.sh \

ubuntu22.04/nvidia-driver

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -639,7 +639,7 @@ _start_vgpu_topology_daemon() {
639639
nvidia-topologyd
640640
}
641641

642-
_ensure_persistence_running() {
642+
_ensure_persistenced() {
643643
local pid_file=/var/run/nvidia-persistenced/nvidia-persistenced.pid pid
644644
if pid=$(<"${pid_file}" 2>/dev/null) && [ -n "${pid}" ] && kill -0 "${pid}" 2>/dev/null; then
645645
return 0
@@ -653,11 +653,31 @@ _ensure_persistence_running() {
653653
}
654654

655655
_build_driver_config() {
656+
local nvidia_params="" nvidia_uvm_params="" nvidia_modeset_params="" nvidia_peermem_params=""
657+
658+
# Read module parameters from conf files
659+
if [ -f "/drivers/nvidia.conf" ]; then
660+
nvidia_params=$(cat "/drivers/nvidia.conf" | tr '\n' ' ')
661+
fi
662+
if [ -f "/drivers/nvidia-uvm.conf" ]; then
663+
nvidia_uvm_params=$(cat "/drivers/nvidia-uvm.conf" | tr '\n' ' ')
664+
fi
665+
if [ -f "/drivers/nvidia-modeset.conf" ]; then
666+
nvidia_modeset_params=$(cat "/drivers/nvidia-modeset.conf" | tr '\n' ' ')
667+
fi
668+
if [ -f "/drivers/nvidia-peermem.conf" ]; then
669+
nvidia_peermem_params=$(cat "/drivers/nvidia-peermem.conf" | tr '\n' ' ')
670+
fi
671+
656672
local config="DRIVER_VERSION=${DRIVER_VERSION}
657673
KERNEL_VERSION=$(uname -r)
658674
GPU_DIRECT_RDMA_ENABLED=${GPU_DIRECT_RDMA_ENABLED}
659675
USE_HOST_MOFED=${USE_HOST_MOFED}
660-
KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE}"
676+
KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE}
677+
NVIDIA_MODULE_PARAMS=${nvidia_params}
678+
NVIDIA_UVM_MODULE_PARAMS=${nvidia_uvm_params}
679+
NVIDIA_MODESET_MODULE_PARAMS=${nvidia_modeset_params}
680+
NVIDIA_PEERMEM_MODULE_PARAMS=${nvidia_peermem_params}"
661681

662682
# Append config file contents directly
663683
for conf_file in nvidia.conf nvidia-uvm.conf nvidia-modeset.conf nvidia-peermem.conf; do
@@ -741,7 +761,7 @@ init() {
741761
_mount_rootfs
742762

743763
# Ensure persistence daemon is running
744-
_ensure_persistence_running
764+
_ensure_persistenced
745765

746766
# Write kernel update hook
747767
_write_kernel_update_hook

0 commit comments

Comments
 (0)