Disable GPU resource processor for nodes using DRA for accelerator attachment

mtrqq · mtrqq · commit 1acc8c203c25 · 2025-09-23T09:00:33.000Z
diff --git a/cluster-autoscaler/cloudprovider/cloud_provider.go b/cluster-autoscaler/cloudprovider/cloud_provider.go
@@ -98,9 +98,14 @@ const (
 
 // GpuConfig contains the label, type and the resource name for a GPU.
 type GpuConfig struct {
-	Label        string
-	Type         string
-	ResourceName apiv1.ResourceName
+	Label                string
+	Type                 string
+	ExtendedResourceName apiv1.ResourceName
+	DraDriverName        string
+}
+
+func (gpu *GpuConfig) ExposedViaDra() bool {
+	return gpu.DraDriverName != ""
 }
 
 // CloudProvider contains configuration info and functions for interacting with
diff --git a/cluster-autoscaler/cloudprovider/gce/dynamicresources.go b/cluster-autoscaler/cloudprovider/gce/dynamicresources.go
@@ -0,0 +1,15 @@
+package gce
+
+import apiv1 "k8s.io/api/core/v1"
+
+const (
+	// DraGPUDriver name of the driver used to expose NVIDIA GPU resources
+	DraGPUDriver = "gpu.nvidia.com"
+	// DraGPULabel is the label added to nodes with GPU resource exposed via DRA.
+	DraGPULabel = "cloud.google.com/gke-gpu-dra-driver"
+)
+
+// GpuDraDriverEnabled checks whether GPU driver is enabled on the node
+func GpuDraDriverEnabled(node *apiv1.Node) bool {
+	return node.Labels[DraGPULabel] == "true"
+}
diff --git a/cluster-autoscaler/cloudprovider/gce/gce_cloud_provider.go b/cluster-autoscaler/cloudprovider/gce/gce_cloud_provider.go
@@ -82,9 +82,20 @@ func (gce *GceCloudProvider) GetAvailableGPUTypes() map[string]struct{} {
 }
 
 // GetNodeGpuConfig returns the label, type and resource name for the GPU added to node. If node doesn't have
-// any GPUs, it returns nil.
+// any GPUs, it returns nil. If node has GPU attached using DRA - populates the according field in GpuConfig
 func (gce *GceCloudProvider) GetNodeGpuConfig(node *apiv1.Node) *cloudprovider.GpuConfig {
-	return gpu.GetNodeGPUFromCloudProvider(gce, node)
+	gpuConfig := gpu.GetNodeGPUFromCloudProvider(gce, node)
+
+	// If GPU devices are exposed using DRA - extended resource
+	// won't be present in the node alloctable or capacity
+	// so we overwrite extended resource name as it won't ever
+	// be there
+	if GpuDraDriverEnabled(node) {
+		gpuConfig.DraDriverName = DraGPUDriver
+		gpuConfig.ExtendedResourceName = ""
+	}
+
+	return gpuConfig
 }
 
 // NodeGroups returns all node groups configured for this cloud provider.
diff --git a/cluster-autoscaler/cloudprovider/kwok/kwok_provider_test.go b/cluster-autoscaler/cloudprovider/kwok/kwok_provider_test.go
@@ -486,7 +486,7 @@ func TestGetNodeGpuConfig(t *testing.T) {
 	l := p.GetNodeGpuConfig(nodeWithGPU)
 	assert.NotNil(t, l)
 	assert.Equal(t, "k8s.amazonaws.com/accelerator", l.Label)
-	assert.Equal(t, gpu.ResourceNvidiaGPU, string(l.ResourceName))
+	assert.Equal(t, gpu.ResourceNvidiaGPU, string(l.ExtendedResourceName))
 	assert.Equal(t, "nvidia-tesla-k80", l.Type)
 
 	nodeWithNoAllocatableGPU := &apiv1.Node{
@@ -499,7 +499,7 @@ func TestGetNodeGpuConfig(t *testing.T) {
 	l = p.GetNodeGpuConfig(nodeWithNoAllocatableGPU)
 	assert.NotNil(t, l)
 	assert.Equal(t, "k8s.amazonaws.com/accelerator", l.Label)
-	assert.Equal(t, gpu.ResourceNvidiaGPU, string(l.ResourceName))
+	assert.Equal(t, gpu.ResourceNvidiaGPU, string(l.ExtendedResourceName))
 	assert.Equal(t, "nvidia-tesla-k80", l.Type)
 
 	nodeWithNoGPULabel := &apiv1.Node{
@@ -515,7 +515,7 @@ func TestGetNodeGpuConfig(t *testing.T) {
 	l = p.GetNodeGpuConfig(nodeWithNoGPULabel)
 	assert.NotNil(t, l)
 	assert.Equal(t, "k8s.amazonaws.com/accelerator", l.Label)
-	assert.Equal(t, gpu.ResourceNvidiaGPU, string(l.ResourceName))
+	assert.Equal(t, gpu.ResourceNvidiaGPU, string(l.ExtendedResourceName))
 	assert.Equal(t, "", l.Type)
 
 }
diff --git a/cluster-autoscaler/processors/customresources/gpu_processor.go b/cluster-autoscaler/processors/customresources/gpu_processor.go
@@ -42,13 +42,15 @@ func (p *GpuCustomResourcesProcessor) FilterOutNodesWithUnreadyResources(context
 	newReadyNodes := make([]*apiv1.Node, 0)
 	nodesWithUnreadyGpu := make(map[string]*apiv1.Node)
 	for _, node := range readyNodes {
+		if gpuExposedViaDra(context, node) {
+			newReadyNodes = append(newReadyNodes, node)
+			continue
+		}
+
 		_, hasGpuLabel := node.Labels[context.CloudProvider.GPULabel()]
 		gpuAllocatable, hasGpuAllocatable := node.Status.Allocatable[gpu.ResourceNvidiaGPU]
 		directXAllocatable, hasDirectXAllocatable := node.Status.Allocatable[gpu.ResourceDirectX]
-		// We expect node to have GPU based on label, but it doesn't show up
-		// on node object. Assume the node is still not fully started (installing
-		// GPU drivers).
-		if hasGpuLabel && ((!hasGpuAllocatable || gpuAllocatable.IsZero()) && (!hasDirectXAllocatable || directXAllocatable.IsZero())) {
+		if hasGpuLabel && ((!hasGpuAllocatable || gpuAllocatable.IsZero()) && (!hasDirectXAllocatable || directXAllocatable.IsZero())) && !gpuExposedViaDra(context, node) {
 			klog.V(3).Infof("Overriding status of node %v, which seems to have unready GPU",
 				node.Name)
 			nodesWithUnreadyGpu[node.Name] = kubernetes.GetUnreadyNodeCopy(node, kubernetes.ResourceUnready)
@@ -70,13 +72,17 @@ func (p *GpuCustomResourcesProcessor) FilterOutNodesWithUnreadyResources(context
 // GetNodeResourceTargets returns mapping of resource names to their targets.
 // This includes resources which are not yet ready to use and visible in kubernetes.
 func (p *GpuCustomResourcesProcessor) GetNodeResourceTargets(context *context.AutoscalingContext, node *apiv1.Node, nodeGroup cloudprovider.NodeGroup) ([]CustomResourceTarget, errors.AutoscalerError) {
-	gpuTarget, err := p.GetNodeGpuTarget(context.CloudProvider.GPULabel(), node, nodeGroup)
+	if gpuExposedViaDra(context, node) {
+		return []CustomResourceTarget{}, nil
+	}
+
+	gpuTarget, err := p.getNodeGpuTarget(context.CloudProvider.GPULabel(), node, nodeGroup)
 	return []CustomResourceTarget{gpuTarget}, err
 }
 
-// GetNodeGpuTarget returns the gpu target of a given node. This includes gpus
+// getNodeGpuTarget returns the gpu target of a given node. This includes gpus
 // that are not ready to use and visible in kubernetes.
-func (p *GpuCustomResourcesProcessor) GetNodeGpuTarget(GPULabel string, node *apiv1.Node, nodeGroup cloudprovider.NodeGroup) (CustomResourceTarget, errors.AutoscalerError) {
+func (p *GpuCustomResourcesProcessor) getNodeGpuTarget(GPULabel string, node *apiv1.Node, nodeGroup cloudprovider.NodeGroup) (CustomResourceTarget, errors.AutoscalerError) {
 	gpuLabel, found := node.Labels[GPULabel]
 	if !found {
 		return CustomResourceTarget{}, nil
@@ -121,3 +127,15 @@ func (p *GpuCustomResourcesProcessor) GetNodeGpuTarget(GPULabel string, node *ap
 // CleanUp cleans up processor's internal structures.
 func (p *GpuCustomResourcesProcessor) CleanUp() {
 }
+
+func gpuExposedViaDra(ctx *context.AutoscalingContext, node *apiv1.Node) bool {
+	gpuConfig := ctx.CloudProvider.GetNodeGpuConfig(node)
+	if gpuConfig == nil {
+		return false
+	}
+
+	// Devices attached through DRA are not using node allocatable
+	// to confirm their attachment, assume that node is ready
+	// and will be checked in the separate processor
+	return gpuConfig.ExposedViaDra()
+}
diff --git a/cluster-autoscaler/processors/customresources/gpu_processor_test.go b/cluster-autoscaler/processors/customresources/gpu_processor_test.go
@@ -25,6 +25,7 @@ import (
 	apiv1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/gce"
 	testprovider "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/test"
 	"k8s.io/autoscaler/cluster-autoscaler/context"
 	"k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
@@ -152,13 +153,28 @@ func TestFilterOutNodesWithUnreadyResources(t *testing.T) {
 	}
 	expectedReadiness[nodeNoGpuUnready.Name] = false
 
+	nodeGPUReadyDra := &apiv1.Node{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: "nodeGPUViaDra",
+			Labels: map[string]string{
+				gce.DraGPULabel: "true",
+			},
+			CreationTimestamp: metav1.NewTime(start),
+		},
+		Status: apiv1.NodeStatus{
+			Conditions: []apiv1.NodeCondition{readyCondition},
+		},
+	}
+	expectedReadiness[nodeGPUReadyDra.Name] = true
+
 	initialReadyNodes := []*apiv1.Node{
 		nodeGpuReady,
 		nodeGpuUnready,
 		nodeGpuUnready2,
 		nodeDirectXReady,
 		nodeDirectXUnready,
 		nodeNoGpuReady,
+		nodeGPUReadyDra,
 	}
 	initialAllNodes := []*apiv1.Node{
 		nodeGpuReady,
@@ -168,6 +184,7 @@ func TestFilterOutNodesWithUnreadyResources(t *testing.T) {
 		nodeDirectXUnready,
 		nodeNoGpuReady,
 		nodeNoGpuUnready,
+		nodeGPUReadyDra,
 	}
 
 	processor := GpuCustomResourcesProcessor{}
diff --git a/cluster-autoscaler/simulator/utilization/info.go b/cluster-autoscaler/simulator/utilization/info.go
@@ -49,14 +49,14 @@ type Info struct {
 // returns the individual cpu, memory and gpu utilization.
 func Calculate(nodeInfo *framework.NodeInfo, skipDaemonSetPods, skipMirrorPods, draEnabled bool, gpuConfig *cloudprovider.GpuConfig, currentTime time.Time) (utilInfo Info, err error) {
 	if gpuConfig != nil {
-		gpuUtil, err := CalculateUtilizationOfResource(nodeInfo, gpuConfig.ResourceName, skipDaemonSetPods, skipMirrorPods, currentTime)
+		gpuUtil, err := CalculateUtilizationOfResource(nodeInfo, gpuConfig.ExtendedResourceName, skipDaemonSetPods, skipMirrorPods, currentTime)
 		if err != nil {
-			klog.V(3).Infof("node %s has unready GPU resource: %s", nodeInfo.Node().Name, gpuConfig.ResourceName)
+			klog.V(3).Infof("node %s has unready GPU resource: %s", nodeInfo.Node().Name, gpuConfig.ExtendedResourceName)
 			// Return 0 if GPU is unready. This will guarantee we can still scale down a node with unready GPU.
-			return Info{GpuUtil: 0, ResourceName: gpuConfig.ResourceName, Utilization: 0}, nil
+			return Info{GpuUtil: 0, ResourceName: gpuConfig.ExtendedResourceName, Utilization: 0}, nil
 		}
 		// Skips cpu and memory utilization calculation for node with GPU.
-		return Info{GpuUtil: gpuUtil, ResourceName: gpuConfig.ResourceName, Utilization: gpuUtil}, err
+		return Info{GpuUtil: gpuUtil, ResourceName: gpuConfig.ExtendedResourceName, Utilization: gpuUtil}, err
 	}
 
 	if draEnabled && len(nodeInfo.LocalResourceSlices) > 0 {
diff --git a/cluster-autoscaler/simulator/utilization/info_test.go b/cluster-autoscaler/simulator/utilization/info_test.go
@@ -343,7 +343,7 @@ func TestCalculateWithDynamicResources(t *testing.T) {
 			nodeInfo:     nodeInfoGpuAndDra,
 			gpuConfig:    gpuConfig,
 			draEnabled:   true,
-			wantUtilInfo: Info{Utilization: 0, ResourceName: gpuConfig.ResourceName},
+			wantUtilInfo: Info{Utilization: 0, ResourceName: gpuConfig.ExtendedResourceName},
 		},
 		{
 			testName:     "DRA slices and claims present, DRA enabled, error while calculating DRA util -> error returned",
@@ -371,9 +371,9 @@ func getGpuConfigFromNode(node *apiv1.Node) *cloudprovider.GpuConfig {
 	gpuAllocatable, hasGpuAllocatable := node.Status.Allocatable[gpu.ResourceNvidiaGPU]
 	if hasGpuLabel || (hasGpuAllocatable && !gpuAllocatable.IsZero()) {
 		return &cloudprovider.GpuConfig{
-			Label:        gpuLabel,
-			Type:         gpuType,
-			ResourceName: gpu.ResourceNvidiaGPU,
+			Label:                gpuLabel,
+			Type:                 gpuType,
+			ExtendedResourceName: gpu.ResourceNvidiaGPU,
 		}
 	}
 	return nil
diff --git a/cluster-autoscaler/utils/gpu/gpu.go b/cluster-autoscaler/utils/gpu/gpu.go
@@ -56,7 +56,7 @@ func GetGpuInfoForMetrics(gpuConfig *cloudprovider.GpuConfig, availableGPUTypes
 	if gpuConfig == nil {
 		return "", MetricsNoGPU
 	}
-	resourceName := gpuConfig.ResourceName
+	resourceName := gpuConfig.ExtendedResourceName
 	capacity, capacityFound := node.Status.Capacity[resourceName]
 	// There is no label value, fallback to generic solution
 	if gpuConfig.Type == "" && capacityFound && !capacity.IsZero() {
@@ -116,7 +116,7 @@ func PodRequestsGpu(pod *apiv1.Pod) bool {
 func GetNodeGPUFromCloudProvider(provider cloudprovider.CloudProvider, node *apiv1.Node) *cloudprovider.GpuConfig {
 	gpuLabel := provider.GPULabel()
 	if NodeHasGpu(gpuLabel, node) {
-		return &cloudprovider.GpuConfig{Label: gpuLabel, Type: node.Labels[gpuLabel], ResourceName: ResourceNvidiaGPU}
+		return &cloudprovider.GpuConfig{Label: gpuLabel, Type: node.Labels[gpuLabel], ExtendedResourceName: ResourceNvidiaGPU}
 	}
 	return nil
 }

Original file line number	Diff line number	Diff line change
`@@ -56,7 +56,7 @@ func GetGpuInfoForMetrics(gpuConfig *cloudprovider.GpuConfig, availableGPUTypes`
`56`	`56`	`if gpuConfig == nil {`
`57`	`57`	`return "", MetricsNoGPU`
`58`	`58`	`}`
`59`		`- resourceName := gpuConfig.ResourceName`
	`59`	`+ resourceName := gpuConfig.ExtendedResourceName`
`60`	`60`	`capacity, capacityFound := node.Status.Capacity[resourceName]`
`61`	`61`	`// There is no label value, fallback to generic solution`
`62`	`62`	`if gpuConfig.Type == "" && capacityFound && !capacity.IsZero() {`
`@@ -116,7 +116,7 @@ func PodRequestsGpu(pod *apiv1.Pod) bool {`
`116`	`116`	`func GetNodeGPUFromCloudProvider(provider cloudprovider.CloudProvider, node apiv1.Node) cloudprovider.GpuConfig {`
`117`	`117`	`gpuLabel := provider.GPULabel()`
`118`	`118`	`if NodeHasGpu(gpuLabel, node) {`
`119`		`- return &cloudprovider.GpuConfig{Label: gpuLabel, Type: node.Labels[gpuLabel], ResourceName: ResourceNvidiaGPU}`
	`119`	`+ return &cloudprovider.GpuConfig{Label: gpuLabel, Type: node.Labels[gpuLabel], ExtendedResourceName: ResourceNvidiaGPU}`
`120`	`120`	`}`
`121`	`121`	`return nil`
`122`	`122`	`}`