1919package main
2020
2121import (
22- "bytes"
2322 "context"
2423 "errors"
2524 "fmt"
@@ -43,28 +42,21 @@ import (
4342const (
4443 driverRoot = "/run/nvidia/driver"
4544 driverPIDFile = "/run/nvidia/nvidia-driver.pid"
46- driverConfigStateFile = "/run/nvidia/driver-config .state"
45+ driverConfigStateFile = "/run/nvidia/nvidia-driver .state"
4746 operatorNamespace = "gpu-operator"
4847 pausedStr = "paused-for-driver-upgrade"
4948 defaultDrainTimeout = time .Second * 0
5049 defaultGracePeriod = 5 * time .Minute
5150
5251 nvidiaDomainPrefix = "nvidia.com"
5352
54- nvidiaModuleConfigFile = "/drivers/nvidia.conf"
55- nvidiaUVMModuleConfigFile = "/drivers/nvidia-uvm.conf"
56- nvidiaModsetModuleConfigFile = "/drivers/nvidia-modeset.conf"
57- nvidiaPeermemModuleConfigFile = "/drivers/nvidia-peermem.conf"
53+ nvidiaModuleConfigFile = driverRoot + "/drivers/nvidia.conf"
54+ nvidiaUVMModuleConfigFile = driverRoot + "/drivers/nvidia-uvm.conf"
55+ nvidiaModesetModuleConfigFile = driverRoot + "/drivers/nvidia-modeset.conf"
56+ nvidiaPeermemModuleConfigFile = driverRoot + "/drivers/nvidia-peermem.conf"
5857)
5958
6059var (
61- driverConfigFiles = []string {
62- nvidiaModuleConfigFile ,
63- nvidiaUVMModuleConfigFile ,
64- nvidiaModsetModuleConfigFile ,
65- nvidiaPeermemModuleConfigFile ,
66- }
67-
6860 nvidiaDriverDeployLabel = nvidiaDomainPrefix + "/" + "gpu.deploy.driver"
6961 nvidiaOperatorValidatorDeployLabel = nvidiaDomainPrefix + "/" + "gpu.deploy.operator-validator"
7062 nvidiaContainerToolkitDeployLabel = nvidiaDomainPrefix + "/" + "gpu.deploy.container-toolkit"
@@ -681,68 +673,52 @@ func (dm *DriverManager) isDriverLoaded() bool {
681673 return err == nil
682674}
683675
684- // getValueWithOverride extracts a value from config by key, but returns override if non-empty
685- func getValueWithOverride (config , key , override string ) string {
686- if override != "" {
687- return override
688- }
689- for _ , line := range strings .Split (config , "\n " ) {
690- if strings .HasPrefix (line , key + "=" ) {
691- return strings .TrimPrefix (line , key + "=" )
692- }
693- }
694- return ""
695- }
696-
697676// getKernelVersion returns the current kernel version
698677func getKernelVersion () string {
699678 var utsname unix.Utsname
700679 if err := unix .Uname (& utsname ); err != nil {
701680 return ""
702681 }
703682
704- release := utsname .Release [:]
705- nullIdx := bytes .IndexByte (release , 0 )
706- return string (release [:nullIdx ])
683+ return strings .Trim (string (utsname .Release [:]), " \r \n \t \u0000 \uffff " )
707684}
708685
709686// buildCurrentConfig constructs the current driver configuration string
710- func (dm * DriverManager ) buildCurrentConfig (storedConfig string ) string {
711- driverVersion := getValueWithOverride (storedConfig , "DRIVER_VERSION" , dm .config .driverVersion )
712- kernelVersion := getValueWithOverride (storedConfig , "KERNEL_VERSION" , getKernelVersion ())
713- kernelModuleType := getValueWithOverride (storedConfig , "KERNEL_MODULE_TYPE" , os .Getenv ("KERNEL_MODULE_TYPE" ))
714- driverTypeEnv := os .Getenv ("DRIVER_TYPE" )
715- if driverTypeEnv == "" {
716- driverTypeEnv = "passthrough"
687+ func (dm * DriverManager ) buildCurrentConfig () string {
688+ driverType := os .Getenv ("DRIVER_TYPE" )
689+ if driverType == "" {
690+ driverType = "passthrough"
717691 }
718- driverType := getValueWithOverride (storedConfig , "DRIVER_TYPE" , driverTypeEnv )
719692
720693 // Read module parameters from conf files
721694 nvidiaParams := readModuleParams (nvidiaModuleConfigFile )
722695 nvidiaUVMParams := readModuleParams (nvidiaUVMModuleConfigFile )
723- nvidiaModeset := readModuleParams (nvidiaModsetModuleConfigFile )
696+ nvidiaModeset := readModuleParams (nvidiaModesetModuleConfigFile )
724697 nvidiaPeermem := readModuleParams (nvidiaPeermemModuleConfigFile )
725698
726- var config strings.Builder
727- config .WriteString (fmt .Sprintf ("DRIVER_VERSION=%s\n " , driverVersion ))
728- config .WriteString (fmt .Sprintf ("DRIVER_TYPE=%s\n " , driverType ))
729- config .WriteString (fmt .Sprintf ("KERNEL_VERSION=%s\n " , kernelVersion ))
730- config .WriteString (fmt .Sprintf ("GPU_DIRECT_RDMA_ENABLED=%v\n " , dm .config .gpuDirectRDMAEnabled ))
731- config .WriteString (fmt .Sprintf ("USE_HOST_MOFED=%v\n " , dm .config .useHostMofed ))
732- config .WriteString (fmt .Sprintf ("KERNEL_MODULE_TYPE=%s\n " , kernelModuleType ))
733- config .WriteString (fmt .Sprintf ("NVIDIA_MODULE_PARAMS=%s\n " , nvidiaParams ))
734- config .WriteString (fmt .Sprintf ("NVIDIA_UVM_MODULE_PARAMS=%s\n " , nvidiaUVMParams ))
735- config .WriteString (fmt .Sprintf ("NVIDIA_MODESET_MODULE_PARAMS=%s\n " , nvidiaModeset ))
736- config .WriteString (fmt .Sprintf ("NVIDIA_PEERMEM_MODULE_PARAMS=%s\n " , nvidiaPeermem ))
737-
738- // Append config file contents directly
739- for _ , file := range driverConfigFiles {
740- if data , err := os .ReadFile (file ); err == nil && len (data ) > 0 {
741- config .Write (data )
742- }
743- }
744-
745- return config .String ()
699+ configTemplate := `DRIVER_VERSION=%s
700+ DRIVER_TYPE=%s
701+ KERNEL_VERSION=%s
702+ GPU_DIRECT_RDMA_ENABLED=%v
703+ USE_HOST_MOFED=%v
704+ KERNEL_MODULE_TYPE=%s
705+ NVIDIA_MODULE_PARAMS=%s
706+ NVIDIA_UVM_MODULE_PARAMS=%s
707+ NVIDIA_MODESET_MODULE_PARAMS=%s
708+ NVIDIA_PEERMEM_MODULE_PARAMS=%s
709+ `
710+ return fmt .Sprintf (configTemplate ,
711+ dm .config .driverVersion ,
712+ driverType ,
713+ getKernelVersion (),
714+ dm .config .gpuDirectRDMAEnabled ,
715+ dm .config .useHostMofed ,
716+ os .Getenv ("KERNEL_MODULE_TYPE" ),
717+ nvidiaParams ,
718+ nvidiaUVMParams ,
719+ nvidiaModeset ,
720+ nvidiaPeermem ,
721+ )
746722}
747723
748724// readModuleParams reads a module parameter config file and returns its contents as a single-line space-separated string
@@ -751,26 +727,28 @@ func readModuleParams(filepath string) string {
751727 if err != nil {
752728 return ""
753729 }
754- // Convert newlines to spaces to match bash implementation
755- return strings .ReplaceAll (strings .TrimSpace (string (data )) , "\n " , " " )
730+ // Convert newlines to spaces and trim whitespace/null bytes
731+ return strings .Trim (strings .ReplaceAll (string (data ), "\n " , " " ), " \r \n \t \u0000 \uffff " )
756732}
757733
758- // driverModuleBuildNeeded checks if driver modules need to be rebuilt
759- func (dm * DriverManager ) driverModuleBuildNeeded () bool {
760- storedData , err := os .ReadFile (driverConfigStateFile )
734+ // shouldUpdateDriverConfig checks if the driver configuration needs to be updated
735+ func (dm * DriverManager ) shouldUpdateDriverConfig () bool {
736+ if ! dm .isDriverLoaded () {
737+ return true
738+ }
739+
740+ storedConfig , err := os .ReadFile (driverConfigStateFile )
761741 if err != nil {
762742 if os .IsNotExist (err ) {
763743 dm .log .Info ("No previous driver configuration found" )
764- return true
744+ } else {
745+ dm .log .Warnf ("Failed to read driver config state file: %v" , err )
765746 }
766- dm .log .Warnf ("Failed to read driver config state file: %v" , err )
767747 return true
768748 }
769749
770- storedConfig := string (storedData )
771- currentConfig := dm .buildCurrentConfig (storedConfig )
772-
773- return currentConfig != storedConfig
750+ currentConfig := dm .buildCurrentConfig ()
751+ return currentConfig != string (storedConfig )
774752}
775753
776754func (dm * DriverManager ) shouldSkipUninstall () bool {
@@ -779,8 +757,7 @@ func (dm *DriverManager) shouldSkipUninstall() bool {
779757 return false
780758 }
781759
782- // Only skip uninstall if driver IS loaded AND config matches (fast path optimization)
783- if dm .isDriverLoaded () && ! dm .driverModuleBuildNeeded () {
760+ if ! dm .shouldUpdateDriverConfig () {
784761 dm .log .Info ("Driver is loaded with matching config, enabling fast path" )
785762 return true
786763 }
0 commit comments