Skip to content

Commit 16a6d90

Browse files
Add DRIVER_VERSION and KERNEL_MODULE_TYPE env vars to k8s-driver-manager init container for ClusterPolicy controller
Signed-off-by: Karthik Vetrivel <[email protected]>
1 parent 12a3616 commit 16a6d90

File tree

2 files changed

+53
-9
lines changed

2 files changed

+53
-9
lines changed

controllers/object_controls.go

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,8 @@ const (
160160
OpenKernelModulesEnabledEnvName = "OPEN_KERNEL_MODULES_ENABLED"
161161
// KernelModuleTypeEnvName is the name of the driver-container envvar to set the desired kernel module type
162162
KernelModuleTypeEnvName = "KERNEL_MODULE_TYPE"
163+
// DriverVersionEnvName is the name of the envvar to set the desired driver version
164+
DriverVersionEnvName = "DRIVER_VERSION"
163165
// MPSRootEnvName is the name of the envvar for configuring the MPS root
164166
MPSRootEnvName = "MPS_ROOT"
165167
// DefaultMPSRoot is the default MPS root path on the host
@@ -1002,7 +1004,7 @@ func TransformDriver(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n C
10021004
}
10031005

10041006
// update driver-manager initContainer
1005-
err = transformDriverManagerInitContainer(obj, &config.Driver.Manager, config.Driver.GPUDirectRDMA)
1007+
err = transformDriverManagerInitContainer(obj, &config.Driver.Manager, config.Driver.GPUDirectRDMA, config.Driver.Version, config.Driver.KernelModuleType)
10061008
if err != nil {
10071009
return err
10081010
}
@@ -1050,7 +1052,7 @@ func TransformDriver(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n C
10501052
// TransformVGPUManager transforms NVIDIA vGPU Manager daemonset with required config as per ClusterPolicy
10511053
func TransformVGPUManager(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error {
10521054
// update k8s-driver-manager initContainer
1053-
err := transformDriverManagerInitContainer(obj, &config.VGPUManager.DriverManager, nil)
1055+
err := transformDriverManagerInitContainer(obj, &config.VGPUManager.DriverManager, nil, config.VGPUManager.Version, "")
10541056
if err != nil {
10551057
return fmt.Errorf("failed to transform k8s-driver-manager initContainer for vGPU Manager: %v", err)
10561058
}
@@ -2009,7 +2011,7 @@ func TransformKataManager(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec
20092011
// TransformVFIOManager transforms VFIO-PCI Manager daemonset with required config as per ClusterPolicy
20102012
func TransformVFIOManager(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error {
20112013
// update k8s-driver-manager initContainer
2012-
err := transformDriverManagerInitContainer(obj, &config.VFIOManager.DriverManager, nil)
2014+
err := transformDriverManagerInitContainer(obj, &config.VFIOManager.DriverManager, nil, config.VFIOManager.Version, "")
20132015
if err != nil {
20142016
return fmt.Errorf("failed to transform k8s-driver-manager initContainer for VFIO Manager: %v", err)
20152017
}
@@ -2741,7 +2743,7 @@ func transformConfigManagerSidecarContainer(obj *appsv1.DaemonSet, config *gpuv1
27412743
return nil
27422744
}
27432745

2744-
func transformDriverManagerInitContainer(obj *appsv1.DaemonSet, driverManagerSpec *gpuv1.DriverManagerSpec, rdmaSpec *gpuv1.GPUDirectRDMASpec) error {
2746+
func transformDriverManagerInitContainer(obj *appsv1.DaemonSet, driverManagerSpec *gpuv1.DriverManagerSpec, rdmaSpec *gpuv1.GPUDirectRDMASpec, driverVersion string, kernelModuleType string) error {
27452747
container := findContainerByName(obj.Spec.Template.Spec.InitContainers, "k8s-driver-manager")
27462748

27472749
if container == nil {
@@ -2765,6 +2767,16 @@ func transformDriverManagerInitContainer(obj *appsv1.DaemonSet, driverManagerSpe
27652767
}
27662768
}
27672769

2770+
// set driver version for config change detection
2771+
if driverVersion != "" {
2772+
setContainerEnv(container, DriverVersionEnvName, driverVersion)
2773+
}
2774+
2775+
// set kernel module type for config change detection
2776+
if kernelModuleType != "" {
2777+
setContainerEnv(container, KernelModuleTypeEnvName, kernelModuleType)
2778+
}
2779+
27682780
// set/append environment variables for driver-manager initContainer
27692781
if len(driverManagerSpec.Env) > 0 {
27702782
for _, env := range driverManagerSpec.Env {

controllers/transforms_test.go

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1937,10 +1937,12 @@ func newBoolPtr(b bool) *bool {
19371937

19381938
func TestTransformDriverManagerInitContainer(t *testing.T) {
19391939
testCases := []struct {
1940-
description string
1941-
ds Daemonset
1942-
cpSpec *gpuv1.ClusterPolicySpec
1943-
expectedDs Daemonset
1940+
description string
1941+
ds Daemonset
1942+
cpSpec *gpuv1.ClusterPolicySpec
1943+
driverVersion string
1944+
kernelModuleType string
1945+
expectedDs Daemonset
19441946
}{
19451947
{
19461948
description: "transform k8s-driver-manager initContainer",
@@ -1963,6 +1965,8 @@ func TestTransformDriverManagerInitContainer(t *testing.T) {
19631965
},
19641966
},
19651967
},
1968+
driverVersion: "",
1969+
kernelModuleType: "",
19661970
expectedDs: NewDaemonset().WithInitContainer(corev1.Container{
19671971
Name: "k8s-driver-manager",
19681972
Image: "nvcr.io/nvidia/cloud-native/k8s-driver-manager:v1.0.0",
@@ -1974,11 +1978,39 @@ func TestTransformDriverManagerInitContainer(t *testing.T) {
19741978
},
19751979
}).WithInitContainer(corev1.Container{Name: "dummy"}).WithPullSecret("pull-secret"),
19761980
},
1981+
{
1982+
description: "transform k8s-driver-manager initContainer with driver version and kernel module type",
1983+
ds: NewDaemonset().
1984+
WithInitContainer(corev1.Container{Name: "k8s-driver-manager"}).
1985+
WithInitContainer(corev1.Container{Name: "dummy"}),
1986+
cpSpec: &gpuv1.ClusterPolicySpec{
1987+
Driver: gpuv1.DriverSpec{
1988+
Manager: gpuv1.DriverManagerSpec{
1989+
Repository: "nvcr.io/nvidia/cloud-native",
1990+
Image: "k8s-driver-manager",
1991+
Version: "v1.0.0",
1992+
ImagePullPolicy: "IfNotPresent",
1993+
ImagePullSecrets: []string{"pull-secret"},
1994+
},
1995+
},
1996+
},
1997+
driverVersion: "550.90.12",
1998+
kernelModuleType: "open",
1999+
expectedDs: NewDaemonset().WithInitContainer(corev1.Container{
2000+
Name: "k8s-driver-manager",
2001+
Image: "nvcr.io/nvidia/cloud-native/k8s-driver-manager:v1.0.0",
2002+
ImagePullPolicy: corev1.PullIfNotPresent,
2003+
Env: []corev1.EnvVar{
2004+
{Name: DriverVersionEnvName, Value: "550.90.12"},
2005+
{Name: KernelModuleTypeEnvName, Value: "open"},
2006+
},
2007+
}).WithInitContainer(corev1.Container{Name: "dummy"}).WithPullSecret("pull-secret"),
2008+
},
19772009
}
19782010

19792011
for _, tc := range testCases {
19802012
t.Run(tc.description, func(t *testing.T) {
1981-
err := transformDriverManagerInitContainer(tc.ds.DaemonSet, &tc.cpSpec.Driver.Manager, tc.cpSpec.Driver.GPUDirectRDMA)
2013+
err := transformDriverManagerInitContainer(tc.ds.DaemonSet, &tc.cpSpec.Driver.Manager, tc.cpSpec.Driver.GPUDirectRDMA, tc.driverVersion, tc.kernelModuleType)
19822014
require.NoError(t, err)
19832015
require.EqualValues(t, tc.expectedDs, tc.ds)
19842016
})

0 commit comments

Comments
 (0)