Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions cmd/compute-domain-controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ import (
// It contains essential fields for driver identification, Kubernetes client access,
// and work queue management.
type ManagerConfig struct {
// appName is the app name to use for the app.kubernetes.io/name label value
// to be used on manager pods
appName string

// driverName is the unique identifier for this DRA driver
driverName string

Expand Down Expand Up @@ -63,6 +67,7 @@ func (c *Controller) Run(ctx context.Context) error {
workQueue := workqueue.New(workqueue.DefaultControllerRateLimiter())

managerConfig := &ManagerConfig{
appName: c.config.flags.appName,
driverName: c.config.driverName,
driverNamespace: c.config.flags.namespace,
imageName: c.config.flags.imageName,
Expand Down
2 changes: 2 additions & 0 deletions cmd/compute-domain-controller/daemonset.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ type DaemonSetTemplateData struct {
Finalizer string
ComputeDomainLabelKey string
ComputeDomainLabelValue types.UID
AppLabelValue string
ResourceClaimTemplateName string
ImageName string
}
Expand Down Expand Up @@ -180,6 +181,7 @@ func (m *DaemonSetManager) Create(ctx context.Context, namespace string, cd *nva
Finalizer: computeDomainFinalizer,
ComputeDomainLabelKey: computeDomainLabelKey,
ComputeDomainLabelValue: cd.UID,
AppLabelValue: m.config.appName,
ResourceClaimTemplateName: rct.Name,
ImageName: m.config.imageName,
}
Expand Down
9 changes: 9 additions & 0 deletions cmd/compute-domain-controller/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ type Flags struct {
podName string
namespace string
imageName string
appName string

httpEndpoint string
metricsPath string
Expand Down Expand Up @@ -99,6 +100,14 @@ func newApp() *cli.App {
Destination: &flags.imageName,
EnvVars: []string{"IMAGE_NAME"},
},
&cli.StringFlag{
Name: "chart-name",
Usage: "The Helm chart name to use for the app label value.",
Required: true,
Destination: &flags.appName,
Value: "nvidia-dra-driver-gpu",
EnvVars: []string{"HELM_CHART_NAME"},
},
&cli.StringFlag{
Category: "HTTP server:",
Name: "http-endpoint",
Expand Down
3 changes: 3 additions & 0 deletions cmd/compute-domain-controller/resourceclaimtemplate.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ type ResourceClaimTemplateTemplateData struct {
Finalizer string
ComputeDomainLabelKey string
ComputeDomainLabelValue types.UID
AppLabelValue string
TargetLabelKey string
TargetLabelValue string
DeviceClassName string
Expand Down Expand Up @@ -298,6 +299,7 @@ func (m *DaemonSetResourceClaimTemplateManager) Create(ctx context.Context, name
Finalizer: computeDomainFinalizer,
ComputeDomainLabelKey: computeDomainLabelKey,
ComputeDomainLabelValue: cd.UID,
AppLabelValue: m.config.appName,
TargetLabelKey: computeDomainResourceClaimTemplateTargetLabelKey,
TargetLabelValue: computeDomainResourceClaimTemplateTargetDaemon,
DeviceClassName: computeDomainDaemonDeviceClass,
Expand Down Expand Up @@ -356,6 +358,7 @@ func (m *WorkloadResourceClaimTemplateManager) Create(ctx context.Context, names
Namespace: namespace,
Name: name,
Finalizer: computeDomainFinalizer,
AppLabelValue: m.config.appName,
ComputeDomainLabelKey: computeDomainLabelKey,
ComputeDomainLabelValue: cd.UID,
TargetLabelKey: computeDomainResourceClaimTemplateTargetLabelKey,
Expand Down
9 changes: 9 additions & 0 deletions cmd/gpu-kubelet-plugin/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ type Flags struct {
hostDriverRoot string
nvidiaCTKPath string
imageName string
appName string
}

type Config struct {
Expand Down Expand Up @@ -117,6 +118,14 @@ func newApp() *cli.App {
Destination: &flags.imageName,
EnvVars: []string{"IMAGE_NAME"},
},
&cli.StringFlag{
Name: "app-name",
Usage: "The app name to use for the app.kubernetes.io/name label value.",
Required: true,
Destination: &flags.appName,
Value: "nvidia-dra-driver-gpu",
EnvVars: []string{"HELM_CHART_NAME"},
},
}
cliFlags = append(cliFlags, flags.kubeClientConfig.Flags()...)
cliFlags = append(cliFlags, flags.loggingConfig.Flags()...)
Expand Down
2 changes: 2 additions & 0 deletions cmd/gpu-kubelet-plugin/sharing.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ type MpsControlDaemonTemplateData struct {
NodeName string
MpsControlDaemonNamespace string
MpsControlDaemonName string
AppLabelValue string
CUDA_VISIBLE_DEVICES string //nolint:stylecheck
DefaultActiveThreadPercentage string
DefaultPinnedDeviceMemoryLimits map[string]string
Expand Down Expand Up @@ -200,6 +201,7 @@ func (m *MpsControlDaemon) Start(ctx context.Context, config *configapi.MpsConfi
NodeName: m.nodeName,
MpsControlDaemonNamespace: m.namespace,
MpsControlDaemonName: m.name,
AppLabelValue: m.manager.config.flags.appName,
CUDA_VISIBLE_DEVICES: strings.Join(deviceUUIDs, ","),
DefaultActiveThreadPercentage: "",
DefaultPinnedDeviceMemoryLimits: nil,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ spec:
fieldPath: metadata.namespace
- name: IMAGE_NAME
value: {{ include "nvidia-dra-driver-gpu.fullimage" . }}
- name: HELM_CHART_NAME
value: {{ .Chart.Name }}
{{- with .Values.controller.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ spec:
resources:
{{- toYaml .Values.kubeletPlugin.containers.computeDomains.resources | nindent 10 }}
env:
- name: HELM_CHART_NAME
value: {{ .Chart.Name }}
- name: MASK_NVIDIA_DRIVER_PARAMS
value: "{{ .Values.maskNvidiaDriverParams }}"
- name: NVIDIA_CTK_PATH
Expand Down
1 change: 1 addition & 0 deletions templates/compute-domain-daemon-claim-template.tmpl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ metadata:
finalizers:
- {{ .Finalizer }}
labels:
app.kubernetes.io/name: {{ .AppLabelValue }}
{{ .ComputeDomainLabelKey }}: {{ .ComputeDomainLabelValue }}
{{ .TargetLabelKey }}: {{ .TargetLabelValue }}
spec:
Expand Down
1 change: 1 addition & 0 deletions templates/compute-domain-daemon.tmpl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ metadata:
finalizers:
- {{ .Finalizer }}
labels:
app.kubernetes.io/name: {{ .AppLabelValue }}
{{ .ComputeDomainLabelKey }}: {{ .ComputeDomainLabelValue }}
spec:
selector:
Expand Down
1 change: 1 addition & 0 deletions templates/compute-domain-workload-claim-template.tmpl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ metadata:
finalizers:
- {{ .Finalizer }}
labels:
app.kubernetes.io/name: {{ .AppLabelValue }}
{{ .ComputeDomainLabelKey }}: {{ .ComputeDomainLabelValue }}
{{ .TargetLabelKey }}: {{ .TargetLabelValue }}
spec:
Expand Down
7 changes: 4 additions & 3 deletions templates/mps-control-daemon.tmpl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,17 @@ metadata:
namespace: {{ .MpsControlDaemonNamespace }}
name: {{ .MpsControlDaemonName }}
labels:
app: {{ .MpsControlDaemonName }}
app.kubernetes.io/name: {{ .AppLabelValue }}
component: {{ .MpsControlDaemonName }}
spec:
replicas: 1
selector:
matchLabels:
app: {{ .MpsControlDaemonName }}
component: {{ .MpsControlDaemonName }}
template:
metadata:
labels:
app: {{ .MpsControlDaemonName }}
component: {{ .MpsControlDaemonName }}
spec:
nodeName: {{ .NodeName }}
hostPID: true
Expand Down
Loading