Skip to content

Commit ded9604

Browse files
committed
scheduler_perf + DRA: load up cluster by allocating claims
Having to schedule 4999 pods to simulate a "full" cluster is slow. Creating claims and then allocating them more or less like the scheduler would when scheduling pods is much faster and in practice has the same effect on the dynamicresources plugin because it looks at claims, not pods. This allows defining the "steady state" workloads with higher number of devices ("claimsPerNode") again. This was prohibitively slow before.
1 parent 385599f commit ded9604

File tree

3 files changed

+162
-24
lines changed

3 files changed

+162
-24
lines changed

test/integration/scheduler_perf/config/performance-config.yaml

Lines changed: 45 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1236,12 +1236,13 @@
12361236
measurePods: 2500
12371237
maxClaimsPerNode: 10
12381238

1239-
# SteadyStateResourceClaimTemplateStructured uses a ResourceClaimTemplate
1240-
# and dynamically creates ResourceClaim instances for each pod, but never
1241-
# more than 10 at a time. Then it waits for a pod to get scheduled
1242-
# before deleting it and creating another one.
1239+
# SteadyStateResourceClaimTemplateStructured uses a ResourceClaimTemplate and
1240+
# dynamically creates ResourceClaim instances for each pod. It creates ten
1241+
# pods, waits for them to be scheduled, deletes them, and starts again,
1242+
# so the cluster remains at the same level of utilization.
12431243
#
1244-
# The workload determines whether there are other pods in the cluster.
1244+
# The number of already allocated claims can be varied, thus simulating
1245+
# various degrees of pre-existing resource utilization.
12451246
#
12461247
# The driver uses structured parameters.
12471248
- name: SteadyStateClusterResourceClaimTemplateStructured
@@ -1262,12 +1263,11 @@
12621263
- opcode: createAny
12631264
templatePath: config/dra/deviceclass-structured.yaml
12641265
- opcode: createAny
1265-
templatePath: config/dra/resourceclaimtemplate-structured.yaml
1266+
templatePath: config/dra/resourceclaim-structured.yaml
1267+
countParam: $initClaims
12661268
namespace: init
1267-
- opcode: createPods
1269+
- opcode: allocResourceClaims
12681270
namespace: init
1269-
countParam: $initPods
1270-
podTemplatePath: config/dra/pod-with-claim-template.yaml
12711271
- opcode: createAny
12721272
templatePath: config/dra/resourceclaimtemplate-structured.yaml
12731273
namespace: test
@@ -1286,52 +1286,73 @@
12861286
# taking too long overall.
12871287
nodesWithDRA: 1
12881288
nodesWithoutDRA: 1
1289-
initPods: 0
1289+
initClaims: 0
12901290
maxClaimsPerNode: 10
12911291
duration: 2s
12921292
- name: empty_100nodes
12931293
params:
12941294
nodesWithDRA: 100
12951295
nodesWithoutDRA: 0
1296-
initPods: 0
1297-
maxClaimsPerNode: 2
1296+
initClaims: 0
1297+
maxClaimsPerNode: 10
12981298
duration: 10s
12991299
- name: empty_200nodes
13001300
params:
13011301
nodesWithDRA: 200
13021302
nodesWithoutDRA: 0
1303-
initPods: 0
1304-
maxClaimsPerNode: 2
1303+
initClaims: 0
1304+
maxClaimsPerNode: 10
13051305
duration: 10s
13061306
- name: empty_500nodes
13071307
params:
13081308
nodesWithDRA: 500
13091309
nodesWithoutDRA: 0
1310-
initPods: 0
1311-
maxClaimsPerNode: 2
1310+
initClaims: 0
1311+
maxClaimsPerNode: 10
13121312
duration: 10s
1313-
# In the "full" scenarios, the cluster can accommodate exactly one additional pod.
1314-
# These are slower because scheduling the initial pods takes time.
1313+
# In the "half" scenarios, half of the devices are in use.
1314+
- name: half_100nodes
1315+
params:
1316+
nodesWithDRA: 100
1317+
nodesWithoutDRA: 0
1318+
initClaims: 500
1319+
maxClaimsPerNode: 10
1320+
duration: 10s
1321+
- name: half_200nodes
1322+
params:
1323+
nodesWithDRA: 200
1324+
nodesWithoutDRA: 0
1325+
initClaims: 1000
1326+
maxClaimsPerNode: 10
1327+
duration: 10s
1328+
- name: half_500nodes
1329+
params:
1330+
nodesWithDRA: 500
1331+
nodesWithoutDRA: 0
1332+
initClaims: 2500
1333+
maxClaimsPerNode: 10
1334+
duration: 10s
1335+
# In the "full" scenarios, the cluster can accommodate exactly 10 additional pods.
13151336
- name: full_100nodes
13161337
params:
13171338
nodesWithDRA: 100
13181339
nodesWithoutDRA: 0
1319-
initPods: 199
1320-
maxClaimsPerNode: 2
1340+
initClaims: 990
1341+
maxClaimsPerNode: 10
13211342
duration: 10s
13221343
- name: full_200nodes
13231344
params:
13241345
nodesWithDRA: 200
13251346
nodesWithoutDRA: 0
1326-
initPods: 399
1327-
maxClaimsPerNode: 2
1347+
initClaims: 1990
1348+
maxClaimsPerNode: 10
13281349
duration: 10s
13291350
- name: full_500nodes
13301351
params:
13311352
nodesWithDRA: 500
13321353
nodesWithoutDRA: 0
1333-
initPods: 999
1334-
maxClaimsPerNode: 2
1354+
initClaims: 4990
1355+
maxClaimsPerNode: 10
13351356
duration: 10s
13361357

13371358
# SchedulingWithResourceClaimTemplate uses ResourceClaims

test/integration/scheduler_perf/dra.go

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,23 @@ package benchmark
1919
import (
2020
"context"
2121
"fmt"
22+
"math/rand/v2"
2223
"path/filepath"
24+
"reflect"
2325
"sync"
2426

27+
"github.com/stretchr/testify/require"
28+
29+
v1 "k8s.io/api/core/v1"
2530
resourceapi "k8s.io/api/resource/v1alpha3"
2631
"k8s.io/apimachinery/pkg/api/resource"
2732
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
33+
"k8s.io/apimachinery/pkg/labels"
34+
"k8s.io/client-go/informers"
2835
"k8s.io/client-go/util/workqueue"
36+
"k8s.io/dynamic-resource-allocation/structured"
2937
"k8s.io/klog/v2"
38+
"k8s.io/kubernetes/pkg/scheduler/util/assumecache"
3039
draapp "k8s.io/kubernetes/test/e2e/dra/test-driver/app"
3140
"k8s.io/kubernetes/test/utils/ktesting"
3241
"k8s.io/utils/ptr"
@@ -261,3 +270,109 @@ func resourceSlice(driverName, nodeName string, capacity int) *resourceapi.Resou
261270

262271
return slice
263272
}
273+
274+
// allocResourceClaimsOp defines an op where resource claims with structured
275+
// parameters get allocated without being associated with a pod.
276+
type allocResourceClaimsOp struct {
277+
// Must be allocResourceClaimsOpcode.
278+
Opcode operationCode
279+
// Namespace where claims are to be allocated, all namespaces if empty.
280+
Namespace string
281+
}
282+
283+
var _ realOp = &allocResourceClaimsOp{}
284+
var _ runnableOp = &allocResourceClaimsOp{}
285+
286+
func (op *allocResourceClaimsOp) isValid(allowParameterization bool) error {
287+
return nil
288+
}
289+
290+
func (op *allocResourceClaimsOp) collectsMetrics() bool {
291+
return false
292+
}
293+
func (op *allocResourceClaimsOp) patchParams(w *workload) (realOp, error) {
294+
return op, op.isValid(false)
295+
}
296+
297+
func (op *allocResourceClaimsOp) requiredNamespaces() []string { return nil }
298+
299+
func (op *allocResourceClaimsOp) run(tCtx ktesting.TContext) {
300+
claims, err := tCtx.Client().ResourceV1alpha3().ResourceClaims(op.Namespace).List(tCtx, metav1.ListOptions{})
301+
tCtx.ExpectNoError(err, "list claims")
302+
tCtx.Logf("allocating %d ResourceClaims", len(claims.Items))
303+
tCtx = ktesting.WithCancel(tCtx)
304+
defer tCtx.Cancel("allocResourceClaimsOp.run is done")
305+
306+
// Track cluster state.
307+
informerFactory := informers.NewSharedInformerFactory(tCtx.Client(), 0)
308+
claimInformer := informerFactory.Resource().V1alpha3().ResourceClaims().Informer()
309+
classLister := informerFactory.Resource().V1alpha3().DeviceClasses().Lister()
310+
sliceLister := informerFactory.Resource().V1alpha3().ResourceSlices().Lister()
311+
nodeLister := informerFactory.Core().V1().Nodes().Lister()
312+
claimCache := assumecache.NewAssumeCache(tCtx.Logger(), claimInformer, "ResourceClaim", "", nil)
313+
claimLister := claimLister{cache: claimCache}
314+
informerFactory.Start(tCtx.Done())
315+
defer func() {
316+
tCtx.Cancel("allocResourceClaimsOp.run is shutting down")
317+
informerFactory.Shutdown()
318+
}()
319+
syncedInformers := informerFactory.WaitForCacheSync(tCtx.Done())
320+
expectSyncedInformers := map[reflect.Type]bool{
321+
reflect.TypeOf(&resourceapi.DeviceClass{}): true,
322+
reflect.TypeOf(&resourceapi.ResourceClaim{}): true,
323+
reflect.TypeOf(&resourceapi.ResourceSlice{}): true,
324+
reflect.TypeOf(&v1.Node{}): true,
325+
}
326+
require.Equal(tCtx, expectSyncedInformers, syncedInformers, "synced informers")
327+
328+
// The set of nodes is assumed to be fixed at this point.
329+
nodes, err := nodeLister.List(labels.Everything())
330+
tCtx.ExpectNoError(err, "list nodes")
331+
332+
// Allocate one claim at a time, picking nodes randomly. Each
333+
// allocation is stored immediately, using the claim cache to avoid
334+
// having to wait for the actual informer update.
335+
claims:
336+
for i := range claims.Items {
337+
claim := &claims.Items[i]
338+
if claim.Status.Allocation != nil {
339+
continue
340+
}
341+
342+
allocator, err := structured.NewAllocator(tCtx, []*resourceapi.ResourceClaim{claim}, claimLister, classLister, sliceLister)
343+
tCtx.ExpectNoError(err, "create allocator")
344+
345+
rand.Shuffle(len(nodes), func(i, j int) {
346+
nodes[i], nodes[j] = nodes[j], nodes[i]
347+
})
348+
for _, node := range nodes {
349+
result, err := allocator.Allocate(tCtx, node)
350+
tCtx.ExpectNoError(err, "allocate claim")
351+
if result != nil {
352+
claim = claim.DeepCopy()
353+
claim.Status.Allocation = result[0]
354+
claim, err := tCtx.Client().ResourceV1alpha3().ResourceClaims(claim.Namespace).UpdateStatus(tCtx, claim, metav1.UpdateOptions{})
355+
tCtx.ExpectNoError(err, "update claim status with allocation")
356+
tCtx.ExpectNoError(claimCache.Assume(claim), "assume claim")
357+
continue claims
358+
}
359+
}
360+
tCtx.Fatalf("Could not allocate claim %d out of %d", i, len(claims.Items))
361+
}
362+
}
363+
364+
type claimLister struct {
365+
cache *assumecache.AssumeCache
366+
}
367+
368+
func (c claimLister) ListAllAllocated() ([]*resourceapi.ResourceClaim, error) {
369+
objs := c.cache.List(nil)
370+
allocatedClaims := make([]*resourceapi.ResourceClaim, 0, len(objs))
371+
for _, obj := range objs {
372+
claim := obj.(*resourceapi.ResourceClaim)
373+
if claim.Status.Allocation != nil {
374+
allocatedClaims = append(allocatedClaims, claim)
375+
}
376+
}
377+
return allocatedClaims, nil
378+
}

test/integration/scheduler_perf/scheduler_perf.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ import (
7676
type operationCode string
7777

7878
const (
79+
allocResourceClaimsOpcode operationCode = "allocResourceClaims"
7980
createAnyOpcode operationCode = "createAny"
8081
createNodesOpcode operationCode = "createNodes"
8182
createNamespacesOpcode operationCode = "createNamespaces"
@@ -426,6 +427,7 @@ type op struct {
426427
// which op we're decoding at runtime.
427428
func (op *op) UnmarshalJSON(b []byte) error {
428429
possibleOps := map[operationCode]realOp{
430+
allocResourceClaimsOpcode: &allocResourceClaimsOp{},
429431
createAnyOpcode: &createAny{},
430432
createNodesOpcode: &createNodesOp{},
431433
createNamespacesOpcode: &createNamespacesOp{},

0 commit comments

Comments
 (0)