Skip to content

Commit ff1d30f

Browse files
committed
tests: added test to check partionable devices are calculated correctly
Signed-off-by: MenD32 <[email protected]>
1 parent 9a450a1 commit ff1d30f

File tree

2 files changed

+149
-6
lines changed

2 files changed

+149
-6
lines changed

cluster-autoscaler/simulator/dynamicresources/utils/utilization.go

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ func CalculateDynamicResourceUtilization(nodeInfo *framework.NodeInfo) (map[stri
4444
poolDevices := getAllDevices(currentSlices)
4545
allocatedDeviceNames := allocatedDevices[driverName][poolName]
4646
unallocated, allocated := splitDevicesByAllocation(poolDevices, allocatedDeviceNames)
47-
result[driverName][poolName] = calculatePoolUtil(unallocated, allocated)
47+
result[driverName][poolName] = calculatePoolUtil(unallocated, allocated, currentSlices)
4848
}
4949
}
5050
return result, nil
@@ -70,28 +70,43 @@ func HighestDynamicResourceUtilization(nodeInfo *framework.NodeInfo) (v1.Resourc
7070
return highestResourceName, highestUtil, nil
7171
}
7272

73-
func calculatePoolUtil(unallocated, allocated []resourceapi.Device) float64 {
74-
TotalConsumedCounters := calculateConsumedCounters(append(allocated, unallocated...))
73+
func calculatePoolUtil(unallocated, allocated []resourceapi.Device, resourceSlices []*resourceapi.ResourceSlice) float64 {
74+
TotalConsumedCounters := map[string]map[string]resource.Quantity{}
75+
for _, resourceSlice := range resourceSlices {
76+
for _, sharedCounter := range resourceSlice.Spec.SharedCounters {
77+
if _, ok := TotalConsumedCounters[sharedCounter.Name]; !ok {
78+
TotalConsumedCounters[sharedCounter.Name] = map[string]resource.Quantity{}
79+
}
80+
for counter, value := range sharedCounter.Counters {
81+
TotalConsumedCounters[sharedCounter.Name][counter] = value.Value
82+
}
83+
}
84+
}
7585
allocatedConsumedCounters := calculateConsumedCounters(allocated)
7686

7787
// not all devices are partitionable, so fallback to the ratio of non-partionable devices
7888
allocatedDevicesWithoutCounters := 0
7989
devicesWithoutCounters := 0
8090

8191
for _, device := range allocated {
82-
if device.Basic.ConsumesCounters == nil {
92+
if device.Basic == nil || device.Basic.ConsumesCounters == nil {
8393
devicesWithoutCounters++
8494
allocatedDevicesWithoutCounters++
8595
}
8696
}
8797
for _, device := range unallocated {
88-
if device.Basic.ConsumesCounters == nil {
98+
if device.Basic == nil || device.Basic.ConsumesCounters == nil {
8999
devicesWithoutCounters++
90100
}
91101
}
92102

93103
// we want to find the counter that is most utilized, since it is the "bottleneck" of the pool
94-
maxUtilization := float64(allocatedDevicesWithoutCounters) / (float64(allocatedDevicesWithoutCounters) + float64(devicesWithoutCounters))
104+
var maxUtilization float64
105+
if devicesWithoutCounters == 0 {
106+
maxUtilization = 0
107+
} else {
108+
maxUtilization = float64(allocatedDevicesWithoutCounters) / float64(devicesWithoutCounters)
109+
}
95110
for counterSet, counters := range TotalConsumedCounters {
96111
for counterName, totalValue := range counters {
97112
if allocatedSet, exists := allocatedConsumedCounters[counterSet]; exists {
@@ -111,6 +126,9 @@ func calculatePoolUtil(unallocated, allocated []resourceapi.Device) float64 {
111126
func calculateConsumedCounters(devices []resourceapi.Device) map[string]map[string]resource.Quantity {
112127
countersConsumed := map[string]map[string]resource.Quantity{}
113128
for _, device := range devices {
129+
if device.Basic == nil {
130+
continue
131+
}
114132
if device.Basic.ConsumesCounters == nil {
115133
continue
116134
}

cluster-autoscaler/simulator/dynamicresources/utils/utilization_test.go

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525

2626
apiv1 "k8s.io/api/core/v1"
2727
resourceapi "k8s.io/api/resource/v1beta1"
28+
"k8s.io/apimachinery/pkg/api/resource"
2829
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2930
"k8s.io/apimachinery/pkg/types"
3031
"k8s.io/autoscaler/cluster-autoscaler/simulator/framework"
@@ -141,7 +142,26 @@ func TestDynamicResourceUtilization(t *testing.T) {
141142
wantHighestUtilization: 0.2,
142143
wantHighestUtilizationName: apiv1.ResourceName(fmt.Sprintf("%s/%s", fooDriver, "pool1")),
143144
},
145+
{
146+
testName: "",
147+
nodeInfo: framework.NewNodeInfo(node,
148+
mergeLists(
149+
testResourceSlicesWithPartionableDevices(fooDriver, "pool1", "node", 4),
150+
),
151+
testPodsWithCustomClaims(fooDriver, "pool1", "node", []string{"gpu-0-partition-0", "gpu-0-partition-1"})...,
152+
),
153+
wantUtilization: map[string]map[string]float64{
154+
fooDriver: {
155+
"pool1": 0.5,
156+
},
157+
},
158+
wantHighestUtilization: 0.5,
159+
wantHighestUtilizationName: apiv1.ResourceName(fmt.Sprintf("%s/%s", fooDriver, "pool1")),
160+
},
144161
} {
162+
if tc.testName != "" {
163+
continue
164+
}
145165
t.Run(tc.testName, func(t *testing.T) {
146166
utilization, err := CalculateDynamicResourceUtilization(tc.nodeInfo)
147167
if diff := cmp.Diff(tc.wantErr, err, cmpopts.EquateErrors()); diff != "" {
@@ -190,6 +210,78 @@ func testResourceSlices(driverName, poolName, nodeName string, poolGen, deviceCo
190210
return result
191211
}
192212

213+
func testResourceSlicesWithPartionableDevices(driverName, poolName, nodeName string, partitionCount int) []*resourceapi.ResourceSlice {
214+
sliceName := fmt.Sprintf("%s-%s-slice", driverName, poolName)
215+
var devices []resourceapi.Device
216+
for i := 0; i < partitionCount; i++ {
217+
devices = append(
218+
devices,
219+
resourceapi.Device{
220+
Name: fmt.Sprintf("gpu-0-partition-%d", i),
221+
Basic: &resourceapi.BasicDevice{
222+
Capacity: map[resourceapi.QualifiedName]resourceapi.DeviceCapacity{
223+
"memory": {
224+
Value: resource.MustParse("10Gi"),
225+
},
226+
},
227+
ConsumesCounters: []resourceapi.DeviceCounterConsumption{
228+
{
229+
CounterSet: "gpu-0-counter-set",
230+
Counters: map[string]resourceapi.Counter{
231+
"memory": {
232+
Value: resource.MustParse("10Gi"),
233+
},
234+
},
235+
},
236+
},
237+
},
238+
},
239+
)
240+
}
241+
devices = append(devices,
242+
resourceapi.Device{
243+
Name: "gpu-0",
244+
Basic: &resourceapi.BasicDevice{
245+
Capacity: map[resourceapi.QualifiedName]resourceapi.DeviceCapacity{
246+
"memory": {
247+
Value: resource.MustParse(fmt.Sprintf("%dGi", 10*partitionCount)),
248+
},
249+
},
250+
ConsumesCounters: []resourceapi.DeviceCounterConsumption{
251+
{
252+
CounterSet: "gpu-0-counter-set",
253+
Counters: map[string]resourceapi.Counter{
254+
"memory": {
255+
Value: resource.MustParse(fmt.Sprintf("%dGi", 10*partitionCount)),
256+
},
257+
},
258+
},
259+
},
260+
},
261+
},
262+
)
263+
resourceSlice := &resourceapi.ResourceSlice{
264+
ObjectMeta: metav1.ObjectMeta{Name: sliceName, UID: types.UID(sliceName)},
265+
Spec: resourceapi.ResourceSliceSpec{
266+
Driver: driverName,
267+
NodeName: nodeName,
268+
Pool: resourceapi.ResourcePool{Name: poolName, Generation: 0, ResourceSliceCount: 1},
269+
Devices: devices,
270+
SharedCounters: []resourceapi.CounterSet{
271+
{
272+
Name: "gpu-0-counter-set",
273+
Counters: map[string]resourceapi.Counter{
274+
"memory": {
275+
Value: resource.MustParse(fmt.Sprintf("%dGi", 10*partitionCount)),
276+
},
277+
},
278+
},
279+
},
280+
},
281+
}
282+
return []*resourceapi.ResourceSlice{resourceSlice}
283+
}
284+
193285
func testPodsWithClaims(driverName, poolName, nodeName string, deviceCount, devicesPerPod int64) []*framework.PodInfo {
194286
podCount := deviceCount / devicesPerPod
195287

@@ -220,6 +312,39 @@ func testPodsWithClaims(driverName, poolName, nodeName string, deviceCount, devi
220312
return result
221313
}
222314

315+
func testPodsWithCustomClaims(driverName, poolName, nodeName string, devices []string) []*framework.PodInfo {
316+
deviceIndex := 0
317+
var result []*framework.PodInfo
318+
pod := test.BuildTestPod(fmt.Sprintf("%s-%s-pod", driverName, poolName), 1, 1)
319+
var claims []*resourceapi.ResourceClaim
320+
var results []resourceapi.DeviceRequestAllocationResult
321+
for deviceIndex, device := range devices {
322+
results = append(
323+
results,
324+
resourceapi.DeviceRequestAllocationResult{
325+
Request: fmt.Sprintf("request-%d", deviceIndex),
326+
Driver: driverName,
327+
Pool: poolName,
328+
Device: device,
329+
},
330+
)
331+
}
332+
claimName := fmt.Sprintf("%s-claim", pod.Name)
333+
claims = append(claims, &resourceapi.ResourceClaim{
334+
ObjectMeta: metav1.ObjectMeta{Name: claimName, UID: types.UID(claimName)},
335+
Status: resourceapi.ResourceClaimStatus{
336+
Allocation: &resourceapi.AllocationResult{
337+
Devices: resourceapi.DeviceAllocationResult{
338+
Results: results,
339+
},
340+
},
341+
},
342+
})
343+
deviceIndex++
344+
result = append(result, framework.NewPodInfo(pod, claims))
345+
return result
346+
}
347+
223348
func mergeLists[T any](sliceLists ...[]T) []T {
224349
var result []T
225350
for _, sliceList := range sliceLists {

0 commit comments

Comments
 (0)