Skip to content

Commit b1559c6

Browse files
authored
Merge pull request kubernetes#126807 from pohly/dra-resourceslice-update
DRA scheduler: ResourceSlice update
2 parents b3c7256 + cf68eb5 commit b1559c6

File tree

7 files changed

+213
-90
lines changed

7 files changed

+213
-90
lines changed

pkg/scheduler/eventhandlers.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -514,7 +514,7 @@ func addAllEventHandlers(
514514
}
515515
handlers = append(handlers, handlerRegistration)
516516
case framework.PodSchedulingContext:
517-
if utilfeature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation) {
517+
if utilfeature.DefaultFeatureGate.Enabled(features.DRAControlPlaneController) {
518518
if handlerRegistration, err = informerFactory.Resource().V1alpha3().PodSchedulingContexts().Informer().AddEventHandler(
519519
buildEvtResHandler(at, framework.PodSchedulingContext, "PodSchedulingContext"),
520520
); err != nil {
@@ -529,6 +529,15 @@ func addAllEventHandlers(
529529
)
530530
handlers = append(handlers, handlerRegistration)
531531
}
532+
case framework.ResourceSlice:
533+
if utilfeature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation) {
534+
if handlerRegistration, err = informerFactory.Resource().V1alpha3().ResourceSlices().Informer().AddEventHandler(
535+
buildEvtResHandler(at, framework.ResourceSlice, "ResourceSlice"),
536+
); err != nil {
537+
return err
538+
}
539+
handlers = append(handlers, handlerRegistration)
540+
}
532541
case framework.DeviceClass:
533542
if utilfeature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation) {
534543
if handlerRegistration, err = informerFactory.Resource().V1alpha3().DeviceClasses().Informer().AddEventHandler(

pkg/scheduler/eventhandlers_test.go

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,7 @@ func TestAddAllEventHandlers(t *testing.T) {
216216
name string
217217
gvkMap map[framework.GVK]framework.ActionType
218218
enableDRA bool
219+
enableClassicDRA bool
219220
expectStaticInformers map[reflect.Type]bool
220221
expectDynamicInformers map[schema.GroupVersionResource]bool
221222
}{
@@ -234,6 +235,7 @@ func TestAddAllEventHandlers(t *testing.T) {
234235
gvkMap: map[framework.GVK]framework.ActionType{
235236
framework.PodSchedulingContext: framework.Add,
236237
framework.ResourceClaim: framework.Add,
238+
framework.ResourceSlice: framework.Add,
237239
framework.DeviceClass: framework.Add,
238240
},
239241
expectStaticInformers: map[reflect.Type]bool{
@@ -244,19 +246,41 @@ func TestAddAllEventHandlers(t *testing.T) {
244246
expectDynamicInformers: map[schema.GroupVersionResource]bool{},
245247
},
246248
{
247-
name: "DRA events enabled",
249+
name: "some DRA events enabled",
248250
gvkMap: map[framework.GVK]framework.ActionType{
249251
framework.PodSchedulingContext: framework.Add,
250252
framework.ResourceClaim: framework.Add,
253+
framework.ResourceSlice: framework.Add,
251254
framework.DeviceClass: framework.Add,
252255
},
253256
enableDRA: true,
257+
expectStaticInformers: map[reflect.Type]bool{
258+
reflect.TypeOf(&v1.Pod{}): true,
259+
reflect.TypeOf(&v1.Node{}): true,
260+
reflect.TypeOf(&v1.Namespace{}): true,
261+
reflect.TypeOf(&resourceapi.ResourceClaim{}): true,
262+
reflect.TypeOf(&resourceapi.ResourceSlice{}): true,
263+
reflect.TypeOf(&resourceapi.DeviceClass{}): true,
264+
},
265+
expectDynamicInformers: map[schema.GroupVersionResource]bool{},
266+
},
267+
{
268+
name: "all DRA events enabled",
269+
gvkMap: map[framework.GVK]framework.ActionType{
270+
framework.PodSchedulingContext: framework.Add,
271+
framework.ResourceClaim: framework.Add,
272+
framework.ResourceSlice: framework.Add,
273+
framework.DeviceClass: framework.Add,
274+
},
275+
enableDRA: true,
276+
enableClassicDRA: true,
254277
expectStaticInformers: map[reflect.Type]bool{
255278
reflect.TypeOf(&v1.Pod{}): true,
256279
reflect.TypeOf(&v1.Node{}): true,
257280
reflect.TypeOf(&v1.Namespace{}): true,
258281
reflect.TypeOf(&resourceapi.PodSchedulingContext{}): true,
259282
reflect.TypeOf(&resourceapi.ResourceClaim{}): true,
283+
reflect.TypeOf(&resourceapi.ResourceSlice{}): true,
260284
reflect.TypeOf(&resourceapi.DeviceClass{}): true,
261285
},
262286
expectDynamicInformers: map[schema.GroupVersionResource]bool{},
@@ -320,6 +344,7 @@ func TestAddAllEventHandlers(t *testing.T) {
320344
for _, tt := range tests {
321345
t.Run(tt.name, func(t *testing.T) {
322346
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.DynamicResourceAllocation, tt.enableDRA)
347+
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.DRAControlPlaneController, tt.enableClassicDRA)
323348
logger, ctx := ktesting.NewTestContext(t)
324349
ctx, cancel := context.WithCancel(ctx)
325350
defer cancel()

pkg/scheduler/framework/plugins/dynamicresources/dynamicresources.go

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,8 @@ func (pl *dynamicResources) EventsToRegister(_ context.Context) ([]framework.Clu
398398
{Event: framework.ClusterEvent{Resource: framework.Node, ActionType: framework.Add | framework.UpdateNodeLabel | framework.UpdateNodeTaint}},
399399
// A pod might be waiting for a class to get created or modified.
400400
{Event: framework.ClusterEvent{Resource: framework.DeviceClass, ActionType: framework.Add | framework.Update}},
401+
// Adding or updating a ResourceSlice might make a pod schedulable because new resources became available.
402+
{Event: framework.ClusterEvent{Resource: framework.ResourceSlice, ActionType: framework.Add | framework.Update}, QueueingHintFn: pl.isSchedulableAfterResourceSliceChange},
401403
}
402404

403405
if pl.podSchedulingContextLister != nil {
@@ -445,7 +447,7 @@ func (pl *dynamicResources) isSchedulableAfterClaimChange(logger klog.Logger, po
445447
// This is not an unexpected error: we know that
446448
// foreachPodResourceClaim only returns errors for "not
447449
// schedulable".
448-
logger.V(4).Info("pod is not schedulable", "pod", klog.KObj(pod), "claim", klog.KObj(modifiedClaim), "reason", err.Error())
450+
logger.V(6).Info("pod is not schedulable after resource claim change", "pod", klog.KObj(pod), "claim", klog.KObj(modifiedClaim), "reason", err.Error())
449451
return framework.QueueSkip, nil
450452
}
451453

@@ -491,6 +493,38 @@ func (pl *dynamicResources) isSchedulableAfterClaimChange(logger klog.Logger, po
491493
return framework.Queue, nil
492494
}
493495

496+
// isSchedulableAfterResourceSliceChange is invoked for add and update slice events reported by
497+
// an informer. Such changes can make an unschedulable pod schedulable when the pod requests a device
498+
// and the change adds a suitable device.
499+
//
500+
// For the sake of faster execution and avoiding code duplication, isSchedulableAfterResourceSliceChange
501+
// only checks whether the pod uses claims. All of the more detailed checks are done in the scheduling
502+
// attempt.
503+
//
504+
// The delete claim event will not invoke it, so newObj will never be nil.
505+
func (pl *dynamicResources) isSchedulableAfterResourceSliceChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
506+
_, modifiedSlice, err := schedutil.As[*resourceapi.ResourceSlice](oldObj, newObj)
507+
if err != nil {
508+
// Shouldn't happen.
509+
return framework.Queue, fmt.Errorf("unexpected object in isSchedulableAfterResourceSliceChange: %w", err)
510+
}
511+
512+
if err := pl.foreachPodResourceClaim(pod, nil); err != nil {
513+
// This is not an unexpected error: we know that
514+
// foreachPodResourceClaim only returns errors for "not
515+
// schedulable".
516+
logger.V(6).Info("pod is not schedulable after resource slice change", "pod", klog.KObj(pod), "resourceSlice", klog.KObj(modifiedSlice), "reason", err.Error())
517+
return framework.QueueSkip, nil
518+
}
519+
520+
// We could check what got changed in the slice, but right now that's likely to be
521+
// about the spec (there's no status yet...).
522+
// We could check whether all claims use classic DRA, but that doesn't seem worth it.
523+
// Let's assume that changing the slice may make the pod schedulable.
524+
logger.V(5).Info("ResourceSlice change might make pod schedulable", "pod", klog.KObj(pod), "resourceSlice", klog.KObj(modifiedSlice))
525+
return framework.Queue, nil
526+
}
527+
494528
// isSchedulableAfterPodSchedulingContextChange is invoked for all
495529
// PodSchedulingContext events reported by an informer. It checks whether that
496530
// change made a previously unschedulable pod schedulable (updated) or a new

pkg/scheduler/framework/types.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ const (
125125
StorageClass GVK = "storage.k8s.io/StorageClass"
126126
PodSchedulingContext GVK = "PodSchedulingContext"
127127
ResourceClaim GVK = "ResourceClaim"
128+
ResourceSlice GVK = "ResourceSlice"
128129
DeviceClass GVK = "DeviceClass"
129130

130131
// WildCard is a special GVK to match all resources.

staging/src/k8s.io/dynamic-resource-allocation/structured/allocator.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ func (a *Allocator) Allocate(ctx context.Context, node *v1.Node) (finalResult []
208208
alloc.requestData[requestIndices{claimIndex: claimIndex, requestIndex: requestIndex}] = requestData
209209
numDevices += requestData.numDevices
210210
}
211-
alloc.logger.Info("Checked claim", "claim", klog.KObj(claim), "numDevices", numDevices)
211+
alloc.logger.V(6).Info("Checked claim", "claim", klog.KObj(claim), "numDevices", numDevices)
212212

213213
// Check that we don't end up with too many results.
214214
if numDevices > resourceapi.AllocationResultsMaxSize {

0 commit comments

Comments
 (0)