From 1434b93b2952fbbe8fe04f7b2226c5e54cc11758 Mon Sep 17 00:00:00 2001 From: Carlos Eduardo Arango Gutierrez Date: Sat, 31 May 2025 14:38:58 +0200 Subject: [PATCH] Add label app.kubernetes.io/name:nvidia-dra-driver-gpu to all project components Signed-off-by: Carlos Eduardo Arango Gutierrez --- cmd/compute-domain-controller/controller.go | 5 +++++ cmd/compute-domain-controller/daemonset.go | 2 ++ cmd/compute-domain-controller/main.go | 9 +++++++++ cmd/compute-domain-controller/resourceclaimtemplate.go | 3 +++ cmd/gpu-kubelet-plugin/main.go | 9 +++++++++ cmd/gpu-kubelet-plugin/sharing.go | 2 ++ .../helm/nvidia-dra-driver-gpu/templates/controller.yaml | 2 ++ .../nvidia-dra-driver-gpu/templates/kubeletplugin.yaml | 2 ++ templates/compute-domain-daemon-claim-template.tmpl.yaml | 1 + templates/compute-domain-daemon.tmpl.yaml | 1 + .../compute-domain-workload-claim-template.tmpl.yaml | 1 + templates/mps-control-daemon.tmpl.yaml | 7 ++++--- 12 files changed, 41 insertions(+), 3 deletions(-) diff --git a/cmd/compute-domain-controller/controller.go b/cmd/compute-domain-controller/controller.go index d37c39417..30f51749b 100644 --- a/cmd/compute-domain-controller/controller.go +++ b/cmd/compute-domain-controller/controller.go @@ -29,6 +29,10 @@ import ( // It contains essential fields for driver identification, Kubernetes client access, // and work queue management. type ManagerConfig struct { + // appName is the app name to use for the app.kubernetes.io/name label value + // to be used on manager pods + appName string + // driverName is the unique identifier for this DRA driver driverName string @@ -63,6 +67,7 @@ func (c *Controller) Run(ctx context.Context) error { workQueue := workqueue.New(workqueue.DefaultControllerRateLimiter()) managerConfig := &ManagerConfig{ + appName: c.config.flags.appName, driverName: c.config.driverName, driverNamespace: c.config.flags.namespace, imageName: c.config.flags.imageName, diff --git a/cmd/compute-domain-controller/daemonset.go b/cmd/compute-domain-controller/daemonset.go index 3fac86528..135b6ec7c 100644 --- a/cmd/compute-domain-controller/daemonset.go +++ b/cmd/compute-domain-controller/daemonset.go @@ -47,6 +47,7 @@ type DaemonSetTemplateData struct { Finalizer string ComputeDomainLabelKey string ComputeDomainLabelValue types.UID + AppLabelValue string ResourceClaimTemplateName string ImageName string } @@ -180,6 +181,7 @@ func (m *DaemonSetManager) Create(ctx context.Context, namespace string, cd *nva Finalizer: computeDomainFinalizer, ComputeDomainLabelKey: computeDomainLabelKey, ComputeDomainLabelValue: cd.UID, + AppLabelValue: m.config.appName, ResourceClaimTemplateName: rct.Name, ImageName: m.config.imageName, } diff --git a/cmd/compute-domain-controller/main.go b/cmd/compute-domain-controller/main.go index 28fda45ea..4b43a1f23 100644 --- a/cmd/compute-domain-controller/main.go +++ b/cmd/compute-domain-controller/main.go @@ -53,6 +53,7 @@ type Flags struct { podName string namespace string imageName string + appName string httpEndpoint string metricsPath string @@ -99,6 +100,14 @@ func newApp() *cli.App { Destination: &flags.imageName, EnvVars: []string{"IMAGE_NAME"}, }, + &cli.StringFlag{ + Name: "chart-name", + Usage: "The Helm chart name to use for the app label value.", + Required: true, + Destination: &flags.appName, + Value: "nvidia-dra-driver-gpu", + EnvVars: []string{"HELM_CHART_NAME"}, + }, &cli.StringFlag{ Category: "HTTP server:", Name: "http-endpoint", diff --git a/cmd/compute-domain-controller/resourceclaimtemplate.go b/cmd/compute-domain-controller/resourceclaimtemplate.go index 5a06598c4..f89588656 100644 --- a/cmd/compute-domain-controller/resourceclaimtemplate.go +++ b/cmd/compute-domain-controller/resourceclaimtemplate.go @@ -49,6 +49,7 @@ type ResourceClaimTemplateTemplateData struct { Finalizer string ComputeDomainLabelKey string ComputeDomainLabelValue types.UID + AppLabelValue string TargetLabelKey string TargetLabelValue string DeviceClassName string @@ -298,6 +299,7 @@ func (m *DaemonSetResourceClaimTemplateManager) Create(ctx context.Context, name Finalizer: computeDomainFinalizer, ComputeDomainLabelKey: computeDomainLabelKey, ComputeDomainLabelValue: cd.UID, + AppLabelValue: m.config.appName, TargetLabelKey: computeDomainResourceClaimTemplateTargetLabelKey, TargetLabelValue: computeDomainResourceClaimTemplateTargetDaemon, DeviceClassName: computeDomainDaemonDeviceClass, @@ -356,6 +358,7 @@ func (m *WorkloadResourceClaimTemplateManager) Create(ctx context.Context, names Namespace: namespace, Name: name, Finalizer: computeDomainFinalizer, + AppLabelValue: m.config.appName, ComputeDomainLabelKey: computeDomainLabelKey, ComputeDomainLabelValue: cd.UID, TargetLabelKey: computeDomainResourceClaimTemplateTargetLabelKey, diff --git a/cmd/gpu-kubelet-plugin/main.go b/cmd/gpu-kubelet-plugin/main.go index 8c79b501e..1a893cc21 100644 --- a/cmd/gpu-kubelet-plugin/main.go +++ b/cmd/gpu-kubelet-plugin/main.go @@ -48,6 +48,7 @@ type Flags struct { hostDriverRoot string nvidiaCTKPath string imageName string + appName string } type Config struct { @@ -117,6 +118,14 @@ func newApp() *cli.App { Destination: &flags.imageName, EnvVars: []string{"IMAGE_NAME"}, }, + &cli.StringFlag{ + Name: "app-name", + Usage: "The app name to use for the app.kubernetes.io/name label value.", + Required: true, + Destination: &flags.appName, + Value: "nvidia-dra-driver-gpu", + EnvVars: []string{"HELM_CHART_NAME"}, + }, } cliFlags = append(cliFlags, flags.kubeClientConfig.Flags()...) cliFlags = append(cliFlags, flags.loggingConfig.Flags()...) diff --git a/cmd/gpu-kubelet-plugin/sharing.go b/cmd/gpu-kubelet-plugin/sharing.go index b71c53d2f..75c437f2e 100644 --- a/cmd/gpu-kubelet-plugin/sharing.go +++ b/cmd/gpu-kubelet-plugin/sharing.go @@ -85,6 +85,7 @@ type MpsControlDaemonTemplateData struct { NodeName string MpsControlDaemonNamespace string MpsControlDaemonName string + AppLabelValue string CUDA_VISIBLE_DEVICES string //nolint:stylecheck DefaultActiveThreadPercentage string DefaultPinnedDeviceMemoryLimits map[string]string @@ -200,6 +201,7 @@ func (m *MpsControlDaemon) Start(ctx context.Context, config *configapi.MpsConfi NodeName: m.nodeName, MpsControlDaemonNamespace: m.namespace, MpsControlDaemonName: m.name, + AppLabelValue: m.manager.config.flags.appName, CUDA_VISIBLE_DEVICES: strings.Join(deviceUUIDs, ","), DefaultActiveThreadPercentage: "", DefaultPinnedDeviceMemoryLimits: nil, diff --git a/deployments/helm/nvidia-dra-driver-gpu/templates/controller.yaml b/deployments/helm/nvidia-dra-driver-gpu/templates/controller.yaml index c307fec0a..d987bdd1c 100644 --- a/deployments/helm/nvidia-dra-driver-gpu/templates/controller.yaml +++ b/deployments/helm/nvidia-dra-driver-gpu/templates/controller.yaml @@ -66,6 +66,8 @@ spec: fieldPath: metadata.namespace - name: IMAGE_NAME value: {{ include "nvidia-dra-driver-gpu.fullimage" . }} + - name: HELM_CHART_NAME + value: {{ .Chart.Name }} {{- with .Values.controller.nodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} diff --git a/deployments/helm/nvidia-dra-driver-gpu/templates/kubeletplugin.yaml b/deployments/helm/nvidia-dra-driver-gpu/templates/kubeletplugin.yaml index 954b2bd72..199214c6d 100644 --- a/deployments/helm/nvidia-dra-driver-gpu/templates/kubeletplugin.yaml +++ b/deployments/helm/nvidia-dra-driver-gpu/templates/kubeletplugin.yaml @@ -72,6 +72,8 @@ spec: resources: {{- toYaml .Values.kubeletPlugin.containers.computeDomains.resources | nindent 10 }} env: + - name: HELM_CHART_NAME + value: {{ .Chart.Name }} - name: MASK_NVIDIA_DRIVER_PARAMS value: "{{ .Values.maskNvidiaDriverParams }}" - name: NVIDIA_CTK_PATH diff --git a/templates/compute-domain-daemon-claim-template.tmpl.yaml b/templates/compute-domain-daemon-claim-template.tmpl.yaml index 3aef68168..7b4723c08 100644 --- a/templates/compute-domain-daemon-claim-template.tmpl.yaml +++ b/templates/compute-domain-daemon-claim-template.tmpl.yaml @@ -7,6 +7,7 @@ metadata: finalizers: - {{ .Finalizer }} labels: + app.kubernetes.io/name: {{ .AppLabelValue }} {{ .ComputeDomainLabelKey }}: {{ .ComputeDomainLabelValue }} {{ .TargetLabelKey }}: {{ .TargetLabelValue }} spec: diff --git a/templates/compute-domain-daemon.tmpl.yaml b/templates/compute-domain-daemon.tmpl.yaml index 6b65cd43d..285508029 100644 --- a/templates/compute-domain-daemon.tmpl.yaml +++ b/templates/compute-domain-daemon.tmpl.yaml @@ -7,6 +7,7 @@ metadata: finalizers: - {{ .Finalizer }} labels: + app.kubernetes.io/name: {{ .AppLabelValue }} {{ .ComputeDomainLabelKey }}: {{ .ComputeDomainLabelValue }} spec: selector: diff --git a/templates/compute-domain-workload-claim-template.tmpl.yaml b/templates/compute-domain-workload-claim-template.tmpl.yaml index 5b9718dfc..73f1d6df0 100644 --- a/templates/compute-domain-workload-claim-template.tmpl.yaml +++ b/templates/compute-domain-workload-claim-template.tmpl.yaml @@ -7,6 +7,7 @@ metadata: finalizers: - {{ .Finalizer }} labels: + app.kubernetes.io/name: {{ .AppLabelValue }} {{ .ComputeDomainLabelKey }}: {{ .ComputeDomainLabelValue }} {{ .TargetLabelKey }}: {{ .TargetLabelValue }} spec: diff --git a/templates/mps-control-daemon.tmpl.yaml b/templates/mps-control-daemon.tmpl.yaml index 4d835fde0..ee27ced37 100644 --- a/templates/mps-control-daemon.tmpl.yaml +++ b/templates/mps-control-daemon.tmpl.yaml @@ -5,16 +5,17 @@ metadata: namespace: {{ .MpsControlDaemonNamespace }} name: {{ .MpsControlDaemonName }} labels: - app: {{ .MpsControlDaemonName }} + app.kubernetes.io/name: {{ .AppLabelValue }} + component: {{ .MpsControlDaemonName }} spec: replicas: 1 selector: matchLabels: - app: {{ .MpsControlDaemonName }} + component: {{ .MpsControlDaemonName }} template: metadata: labels: - app: {{ .MpsControlDaemonName }} + component: {{ .MpsControlDaemonName }} spec: nodeName: {{ .NodeName }} hostPID: true