Skip to content

Commit cef695b

Browse files
authored
Merge pull request #8547 from mtrqq/dra-gpu-processor
Disable GPU resource processor for nodes using DRA for accelerator attachment
2 parents 191c33b + d529b17 commit cef695b

File tree

10 files changed

+310
-47
lines changed

10 files changed

+310
-47
lines changed

cluster-autoscaler/cloudprovider/cloud_provider.go

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,9 +98,16 @@ const (
9898

9999
// GpuConfig contains the label, type and the resource name for a GPU.
100100
type GpuConfig struct {
101-
Label string
102-
Type string
103-
ResourceName apiv1.ResourceName
101+
Label string
102+
Type string
103+
ExtendedResourceName apiv1.ResourceName
104+
DraDriverName string
105+
}
106+
107+
// ExposedViaDra determines whether a GPU described in the config
108+
// is exposed via device plugin or DRA driver
109+
func (gpu *GpuConfig) ExposedViaDra() bool {
110+
return gpu.DraDriverName != ""
104111
}
105112

106113
// CloudProvider contains configuration info and functions for interacting with
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package gce
18+
19+
import apiv1 "k8s.io/api/core/v1"
20+
21+
const (
22+
// DraGPUDriver name of the driver used to expose NVIDIA GPU resources
23+
DraGPUDriver = "gpu.nvidia.com"
24+
// DraGPULabel is the label added to nodes with GPU resource exposed via DRA.
25+
DraGPULabel = "cloud.google.com/gke-gpu-dra-driver"
26+
)
27+
28+
// GpuDraDriverEnabled checks whether GPU driver is enabled on the node
29+
func GpuDraDriverEnabled(node *apiv1.Node) bool {
30+
return node.Labels[DraGPULabel] == "true"
31+
}

cluster-autoscaler/cloudprovider/gce/gce_cloud_provider.go

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,20 @@ func (gce *GceCloudProvider) GetAvailableGPUTypes() map[string]struct{} {
8282
}
8383

8484
// GetNodeGpuConfig returns the label, type and resource name for the GPU added to node. If node doesn't have
85-
// any GPUs, it returns nil.
85+
// any GPUs, it returns nil. If node has GPU attached using DRA - populates the according field in GpuConfig
8686
func (gce *GceCloudProvider) GetNodeGpuConfig(node *apiv1.Node) *cloudprovider.GpuConfig {
87-
return gpu.GetNodeGPUFromCloudProvider(gce, node)
87+
gpuConfig := gpu.GetNodeGPUFromCloudProvider(gce, node)
88+
89+
// If GPU devices are exposed using DRA - extended resource
90+
// won't be present in the node alloctable or capacity
91+
// so we overwrite extended resource name as it won't ever
92+
// be there
93+
if GpuDraDriverEnabled(node) {
94+
gpuConfig.DraDriverName = DraGPUDriver
95+
gpuConfig.ExtendedResourceName = ""
96+
}
97+
98+
return gpuConfig
8899
}
89100

90101
// NodeGroups returns all node groups configured for this cloud provider.

cluster-autoscaler/cloudprovider/kwok/kwok_provider_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -486,7 +486,7 @@ func TestGetNodeGpuConfig(t *testing.T) {
486486
l := p.GetNodeGpuConfig(nodeWithGPU)
487487
assert.NotNil(t, l)
488488
assert.Equal(t, "k8s.amazonaws.com/accelerator", l.Label)
489-
assert.Equal(t, gpu.ResourceNvidiaGPU, string(l.ResourceName))
489+
assert.Equal(t, gpu.ResourceNvidiaGPU, string(l.ExtendedResourceName))
490490
assert.Equal(t, "nvidia-tesla-k80", l.Type)
491491

492492
nodeWithNoAllocatableGPU := &apiv1.Node{
@@ -499,7 +499,7 @@ func TestGetNodeGpuConfig(t *testing.T) {
499499
l = p.GetNodeGpuConfig(nodeWithNoAllocatableGPU)
500500
assert.NotNil(t, l)
501501
assert.Equal(t, "k8s.amazonaws.com/accelerator", l.Label)
502-
assert.Equal(t, gpu.ResourceNvidiaGPU, string(l.ResourceName))
502+
assert.Equal(t, gpu.ResourceNvidiaGPU, string(l.ExtendedResourceName))
503503
assert.Equal(t, "nvidia-tesla-k80", l.Type)
504504

505505
nodeWithNoGPULabel := &apiv1.Node{
@@ -515,7 +515,7 @@ func TestGetNodeGpuConfig(t *testing.T) {
515515
l = p.GetNodeGpuConfig(nodeWithNoGPULabel)
516516
assert.NotNil(t, l)
517517
assert.Equal(t, "k8s.amazonaws.com/accelerator", l.Label)
518-
assert.Equal(t, gpu.ResourceNvidiaGPU, string(l.ResourceName))
518+
assert.Equal(t, gpu.ResourceNvidiaGPU, string(l.ExtendedResourceName))
519519
assert.Equal(t, "", l.Type)
520520

521521
}

cluster-autoscaler/processors/customresources/gpu_processor.go

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,14 @@ func (p *GpuCustomResourcesProcessor) FilterOutNodesWithUnreadyResources(context
4242
newReadyNodes := make([]*apiv1.Node, 0)
4343
nodesWithUnreadyGpu := make(map[string]*apiv1.Node)
4444
for _, node := range readyNodes {
45+
if gpuExposedViaDra(context, node) {
46+
newReadyNodes = append(newReadyNodes, node)
47+
continue
48+
}
49+
4550
_, hasGpuLabel := node.Labels[context.CloudProvider.GPULabel()]
4651
gpuAllocatable, hasGpuAllocatable := node.Status.Allocatable[gpu.ResourceNvidiaGPU]
4752
directXAllocatable, hasDirectXAllocatable := node.Status.Allocatable[gpu.ResourceDirectX]
48-
// We expect node to have GPU based on label, but it doesn't show up
49-
// on node object. Assume the node is still not fully started (installing
50-
// GPU drivers).
5153
if hasGpuLabel && ((!hasGpuAllocatable || gpuAllocatable.IsZero()) && (!hasDirectXAllocatable || directXAllocatable.IsZero())) {
5254
klog.V(3).Infof("Overriding status of node %v, which seems to have unready GPU",
5355
node.Name)
@@ -70,18 +72,22 @@ func (p *GpuCustomResourcesProcessor) FilterOutNodesWithUnreadyResources(context
7072
// GetNodeResourceTargets returns mapping of resource names to their targets.
7173
// This includes resources which are not yet ready to use and visible in kubernetes.
7274
func (p *GpuCustomResourcesProcessor) GetNodeResourceTargets(context *context.AutoscalingContext, node *apiv1.Node, nodeGroup cloudprovider.NodeGroup) ([]CustomResourceTarget, errors.AutoscalerError) {
73-
gpuTarget, err := p.GetNodeGpuTarget(context.CloudProvider.GPULabel(), node, nodeGroup)
75+
gpuTarget, err := p.GetNodeGpuTarget(context, node, nodeGroup)
7476
return []CustomResourceTarget{gpuTarget}, err
7577
}
7678

7779
// GetNodeGpuTarget returns the gpu target of a given node. This includes gpus
7880
// that are not ready to use and visible in kubernetes.
79-
func (p *GpuCustomResourcesProcessor) GetNodeGpuTarget(GPULabel string, node *apiv1.Node, nodeGroup cloudprovider.NodeGroup) (CustomResourceTarget, errors.AutoscalerError) {
80-
gpuLabel, found := node.Labels[GPULabel]
81+
func (p *GpuCustomResourcesProcessor) GetNodeGpuTarget(context *context.AutoscalingContext, node *apiv1.Node, nodeGroup cloudprovider.NodeGroup) (CustomResourceTarget, errors.AutoscalerError) {
82+
gpuLabel, found := node.Labels[context.CloudProvider.GPULabel()]
8183
if !found {
8284
return CustomResourceTarget{}, nil
8385
}
8486

87+
if gpuExposedViaDra(context, node) {
88+
return CustomResourceTarget{}, nil
89+
}
90+
8591
gpuAllocatable, found := node.Status.Allocatable[gpu.ResourceNvidiaGPU]
8692
if found && gpuAllocatable.Value() > 0 {
8793
return CustomResourceTarget{gpuLabel, gpuAllocatable.Value()}, nil
@@ -121,3 +127,15 @@ func (p *GpuCustomResourcesProcessor) GetNodeGpuTarget(GPULabel string, node *ap
121127
// CleanUp cleans up processor's internal structures.
122128
func (p *GpuCustomResourcesProcessor) CleanUp() {
123129
}
130+
131+
func gpuExposedViaDra(ctx *context.AutoscalingContext, node *apiv1.Node) bool {
132+
gpuConfig := ctx.CloudProvider.GetNodeGpuConfig(node)
133+
if gpuConfig == nil {
134+
return false
135+
}
136+
137+
// Devices attached through DRA are not using node allocatable
138+
// to confirm their attachment, assume that node is ready
139+
// and will be checked in the separate processor
140+
return gpuConfig.ExposedViaDra()
141+
}

cluster-autoscaler/processors/customresources/gpu_processor_test.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
apiv1 "k8s.io/api/core/v1"
2626
"k8s.io/apimachinery/pkg/api/resource"
2727
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
28+
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/gce"
2829
testprovider "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/test"
2930
"k8s.io/autoscaler/cluster-autoscaler/context"
3031
"k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
@@ -152,13 +153,28 @@ func TestFilterOutNodesWithUnreadyResources(t *testing.T) {
152153
}
153154
expectedReadiness[nodeNoGpuUnready.Name] = false
154155

156+
nodeGPUReadyDra := &apiv1.Node{
157+
ObjectMeta: metav1.ObjectMeta{
158+
Name: "nodeGPUViaDra",
159+
Labels: map[string]string{
160+
gce.DraGPULabel: "true",
161+
},
162+
CreationTimestamp: metav1.NewTime(start),
163+
},
164+
Status: apiv1.NodeStatus{
165+
Conditions: []apiv1.NodeCondition{readyCondition},
166+
},
167+
}
168+
expectedReadiness[nodeGPUReadyDra.Name] = true
169+
155170
initialReadyNodes := []*apiv1.Node{
156171
nodeGpuReady,
157172
nodeGpuUnready,
158173
nodeGpuUnready2,
159174
nodeDirectXReady,
160175
nodeDirectXUnready,
161176
nodeNoGpuReady,
177+
nodeGPUReadyDra,
162178
}
163179
initialAllNodes := []*apiv1.Node{
164180
nodeGpuReady,
@@ -168,6 +184,7 @@ func TestFilterOutNodesWithUnreadyResources(t *testing.T) {
168184
nodeDirectXUnready,
169185
nodeNoGpuReady,
170186
nodeNoGpuUnready,
187+
nodeGPUReadyDra,
171188
}
172189

173190
processor := GpuCustomResourcesProcessor{}

cluster-autoscaler/simulator/utilization/info.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,15 +48,15 @@ type Info struct {
4848
// utilization is the sum of requests for it divided by allocatable. It also
4949
// returns the individual cpu, memory and gpu utilization.
5050
func Calculate(nodeInfo *framework.NodeInfo, skipDaemonSetPods, skipMirrorPods, draEnabled bool, gpuConfig *cloudprovider.GpuConfig, currentTime time.Time) (utilInfo Info, err error) {
51-
if gpuConfig != nil {
52-
gpuUtil, err := CalculateUtilizationOfResource(nodeInfo, gpuConfig.ResourceName, skipDaemonSetPods, skipMirrorPods, currentTime)
51+
if gpuConfig != nil && !gpuConfig.ExposedViaDra() {
52+
gpuUtil, err := CalculateUtilizationOfResource(nodeInfo, gpuConfig.ExtendedResourceName, skipDaemonSetPods, skipMirrorPods, currentTime)
5353
if err != nil {
54-
klog.V(3).Infof("node %s has unready GPU resource: %s", nodeInfo.Node().Name, gpuConfig.ResourceName)
54+
klog.V(3).Infof("node %s has unready GPU resource: %s", nodeInfo.Node().Name, gpuConfig.ExtendedResourceName)
5555
// Return 0 if GPU is unready. This will guarantee we can still scale down a node with unready GPU.
56-
return Info{GpuUtil: 0, ResourceName: gpuConfig.ResourceName, Utilization: 0}, nil
56+
return Info{GpuUtil: 0, ResourceName: gpuConfig.ExtendedResourceName, Utilization: 0}, nil
5757
}
5858
// Skips cpu and memory utilization calculation for node with GPU.
59-
return Info{GpuUtil: gpuUtil, ResourceName: gpuConfig.ResourceName, Utilization: gpuUtil}, err
59+
return Info{GpuUtil: gpuUtil, ResourceName: gpuConfig.ExtendedResourceName, Utilization: gpuUtil}, err
6060
}
6161

6262
if draEnabled && len(nodeInfo.LocalResourceSlices) > 0 {

cluster-autoscaler/simulator/utilization/info_test.go

Lines changed: 33 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ func TestCalculate(t *testing.T) {
8282
SetNodeReadyState(node, true, time.Time{})
8383
nodeInfo := framework.NewTestNodeInfo(node, pod, pod, pod2)
8484

85-
gpuConfig := getGpuConfigFromNode(nodeInfo.Node())
85+
gpuConfig := getGpuConfigFromNode(nodeInfo.Node(), false)
8686
utilInfo, err := Calculate(nodeInfo, false, false, false, gpuConfig, testTime)
8787
assert.NoError(t, err)
8888
assert.InEpsilon(t, 2.0/10, utilInfo.Utilization, 0.01)
@@ -91,15 +91,15 @@ func TestCalculate(t *testing.T) {
9191
node2 := BuildTestNode("node2", 2000, -1)
9292
nodeInfo = framework.NewTestNodeInfo(node2, pod, pod, pod2)
9393

94-
gpuConfig = getGpuConfigFromNode(nodeInfo.Node())
94+
gpuConfig = getGpuConfigFromNode(nodeInfo.Node(), false)
9595
_, err = Calculate(nodeInfo, false, false, false, gpuConfig, testTime)
9696
assert.Error(t, err)
9797

9898
node3 := BuildTestNode("node3", 2000, 2000000)
9999
SetNodeReadyState(node3, true, time.Time{})
100100
nodeInfo = framework.NewTestNodeInfo(node3, pod, podWithInitContainers, podWithLargeNonRestartableInitContainers)
101101

102-
gpuConfig = getGpuConfigFromNode(nodeInfo.Node())
102+
gpuConfig = getGpuConfigFromNode(nodeInfo.Node(), false)
103103
utilInfo, err = Calculate(nodeInfo, false, false, false, gpuConfig, testTime)
104104
assert.NoError(t, err)
105105
assert.InEpsilon(t, 50.25, utilInfo.Utilization, 0.01)
@@ -113,21 +113,21 @@ func TestCalculate(t *testing.T) {
113113
daemonSetPod4.Annotations = map[string]string{"cluster-autoscaler.kubernetes.io/daemonset-pod": "true"}
114114

115115
nodeInfo = framework.NewTestNodeInfo(node, pod, pod, pod2, daemonSetPod3, daemonSetPod4)
116-
gpuConfig = getGpuConfigFromNode(nodeInfo.Node())
116+
gpuConfig = getGpuConfigFromNode(nodeInfo.Node(), false)
117117
utilInfo, err = Calculate(nodeInfo, true, false, false, gpuConfig, testTime)
118118
assert.NoError(t, err)
119119
assert.InEpsilon(t, 2.5/10, utilInfo.Utilization, 0.01)
120120

121121
nodeInfo = framework.NewTestNodeInfo(node, pod, pod2, daemonSetPod3)
122-
gpuConfig = getGpuConfigFromNode(nodeInfo.Node())
122+
gpuConfig = getGpuConfigFromNode(nodeInfo.Node(), false)
123123
utilInfo, err = Calculate(nodeInfo, false, false, false, gpuConfig, testTime)
124124
assert.NoError(t, err)
125125
assert.InEpsilon(t, 2.0/10, utilInfo.Utilization, 0.01)
126126

127127
terminatedPod := BuildTestPod("podTerminated", 100, 200000)
128128
terminatedPod.DeletionTimestamp = &metav1.Time{Time: testTime.Add(-10 * time.Minute)}
129129
nodeInfo = framework.NewTestNodeInfo(node, pod, pod, pod2, terminatedPod)
130-
gpuConfig = getGpuConfigFromNode(nodeInfo.Node())
130+
gpuConfig = getGpuConfigFromNode(nodeInfo.Node(), false)
131131
utilInfo, err = Calculate(nodeInfo, false, false, false, gpuConfig, testTime)
132132
assert.NoError(t, err)
133133
assert.InEpsilon(t, 2.0/10, utilInfo.Utilization, 0.01)
@@ -138,19 +138,19 @@ func TestCalculate(t *testing.T) {
138138
}
139139

140140
nodeInfo = framework.NewTestNodeInfo(node, pod, pod, pod2, mirrorPod)
141-
gpuConfig = getGpuConfigFromNode(nodeInfo.Node())
141+
gpuConfig = getGpuConfigFromNode(nodeInfo.Node(), false)
142142
utilInfo, err = Calculate(nodeInfo, false, true, false, gpuConfig, testTime)
143143
assert.NoError(t, err)
144144
assert.InEpsilon(t, 2.0/9.0, utilInfo.Utilization, 0.01)
145145

146146
nodeInfo = framework.NewTestNodeInfo(node, pod, pod2, mirrorPod)
147-
gpuConfig = getGpuConfigFromNode(nodeInfo.Node())
147+
gpuConfig = getGpuConfigFromNode(nodeInfo.Node(), false)
148148
utilInfo, err = Calculate(nodeInfo, false, false, false, gpuConfig, testTime)
149149
assert.NoError(t, err)
150150
assert.InEpsilon(t, 2.0/10, utilInfo.Utilization, 0.01)
151151

152152
nodeInfo = framework.NewTestNodeInfo(node, pod, mirrorPod, daemonSetPod3)
153-
gpuConfig = getGpuConfigFromNode(nodeInfo.Node())
153+
gpuConfig = getGpuConfigFromNode(nodeInfo.Node(), false)
154154
utilInfo, err = Calculate(nodeInfo, true, true, false, gpuConfig, testTime)
155155
assert.NoError(t, err)
156156
assert.InEpsilon(t, 1.0/8.0, utilInfo.Utilization, 0.01)
@@ -161,7 +161,7 @@ func TestCalculate(t *testing.T) {
161161
RequestGpuForPod(gpuPod, 1)
162162
TolerateGpuForPod(gpuPod)
163163
nodeInfo = framework.NewTestNodeInfo(gpuNode, pod, pod, gpuPod)
164-
gpuConfig = getGpuConfigFromNode(nodeInfo.Node())
164+
gpuConfig = getGpuConfigFromNode(nodeInfo.Node(), false)
165165
utilInfo, err = Calculate(nodeInfo, false, false, false, gpuConfig, testTime)
166166
assert.NoError(t, err)
167167
assert.InEpsilon(t, 1/1, utilInfo.Utilization, 0.01)
@@ -170,7 +170,7 @@ func TestCalculate(t *testing.T) {
170170
gpuNode = BuildTestNode("gpu_node", 2000, 2000000)
171171
AddGpuLabelToNode(gpuNode)
172172
nodeInfo = framework.NewTestNodeInfo(gpuNode, pod, pod)
173-
gpuConfig = getGpuConfigFromNode(nodeInfo.Node())
173+
gpuConfig = getGpuConfigFromNode(nodeInfo.Node(), false)
174174
utilInfo, err = Calculate(nodeInfo, false, false, false, gpuConfig, testTime)
175175
assert.NoError(t, err)
176176
assert.Zero(t, utilInfo.Utilization)
@@ -182,7 +182,8 @@ func TestCalculateWithDynamicResources(t *testing.T) {
182182
gpuNode := BuildTestNode("gpuNode", 1000, 1000)
183183
AddGpusToNode(gpuNode, 1)
184184
AddGpuLabelToNode(gpuNode)
185-
gpuConfig := getGpuConfigFromNode(gpuNode)
185+
gpuConfig := getGpuConfigFromNode(gpuNode, false)
186+
gpuConfigDra := getGpuConfigFromNode(gpuNode, true)
186187
pod1 := BuildTestPod("pod1", 250, 0, WithNodeName("node"))
187188
pod2 := BuildTestPod("pod2", 250, 0, WithNodeName("node"))
188189
resourceSlice1 := &resourceapi.ResourceSlice{
@@ -343,7 +344,14 @@ func TestCalculateWithDynamicResources(t *testing.T) {
343344
nodeInfo: nodeInfoGpuAndDra,
344345
gpuConfig: gpuConfig,
345346
draEnabled: true,
346-
wantUtilInfo: Info{Utilization: 0, ResourceName: gpuConfig.ResourceName},
347+
wantUtilInfo: Info{Utilization: 0, ResourceName: gpuConfig.ExtendedResourceName},
348+
},
349+
{
350+
testName: "DRA slices and claims present, DRA enabled, DRA GPU config passed -> DRA util returned",
351+
nodeInfo: nodeInfoGpuAndDra,
352+
gpuConfig: gpuConfigDra,
353+
draEnabled: true,
354+
wantUtilInfo: Info{DynamicResourceUtil: 0.8, Utilization: 0.8, ResourceName: apiv1.ResourceName("driver.foo.com/node-pool1")},
347355
},
348356
{
349357
testName: "DRA slices and claims present, DRA enabled, error while calculating DRA util -> error returned",
@@ -365,15 +373,23 @@ func TestCalculateWithDynamicResources(t *testing.T) {
365373
}
366374
}
367375

368-
func getGpuConfigFromNode(node *apiv1.Node) *cloudprovider.GpuConfig {
376+
func getGpuConfigFromNode(node *apiv1.Node, dra bool) *cloudprovider.GpuConfig {
369377
gpuLabel := "cloud.google.com/gke-accelerator"
370378
gpuType, hasGpuLabel := node.Labels[gpuLabel]
371379
gpuAllocatable, hasGpuAllocatable := node.Status.Allocatable[gpu.ResourceNvidiaGPU]
372380
if hasGpuLabel || (hasGpuAllocatable && !gpuAllocatable.IsZero()) {
381+
if !dra {
382+
return &cloudprovider.GpuConfig{
383+
Label: gpuLabel,
384+
Type: gpuType,
385+
ExtendedResourceName: gpu.ResourceNvidiaGPU,
386+
}
387+
}
388+
373389
return &cloudprovider.GpuConfig{
374-
Label: gpuLabel,
375-
Type: gpuType,
376-
ResourceName: gpu.ResourceNvidiaGPU,
390+
Label: gpuLabel,
391+
Type: gpuType,
392+
DraDriverName: "gpu.nvidia.com",
377393
}
378394
}
379395
return nil

0 commit comments

Comments
 (0)