Skip to content

Commit 4a265fe

Browse files
committed
DRA scheduler: fix queuing hint support
d66f8f9 added that "plugins have to implement a QueueingHint for Pod/Update event if the rejection from them could be resolved by updating unscheduled Pods itself". This applies to DRA because the name of a generated ResourceClaim must be recorded in the pod status before the pod can be scheduled.
1 parent dd4943c commit 4a265fe

File tree

1 file changed

+33
-1
lines changed

1 file changed

+33
-1
lines changed

pkg/scheduler/framework/plugins/dynamicresources/dynamicresources.go

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,8 @@ func (pl *dynamicResources) EventsToRegister(_ context.Context) ([]framework.Clu
399399
{Event: framework.ClusterEvent{Resource: framework.Node, ActionType: nodeActionType}},
400400
// Allocation is tracked in ResourceClaims, so any changes may make the pods schedulable.
401401
{Event: framework.ClusterEvent{Resource: framework.ResourceClaim, ActionType: framework.Add | framework.Update}, QueueingHintFn: pl.isSchedulableAfterClaimChange},
402+
// Adding the ResourceClaim name to the pod status makes pods waiting for their ResourceClaim schedulable.
403+
{Event: framework.ClusterEvent{Resource: framework.Pod, ActionType: framework.Update}, QueueingHintFn: pl.isSchedulableAfterPodChange},
402404
// A pod might be waiting for a class to get created or modified.
403405
{Event: framework.ClusterEvent{Resource: framework.DeviceClass, ActionType: framework.Add | framework.Update}},
404406
// Adding or updating a ResourceSlice might make a pod schedulable because new resources became available.
@@ -450,7 +452,10 @@ func (pl *dynamicResources) isSchedulableAfterClaimChange(logger klog.Logger, po
450452
// This is not an unexpected error: we know that
451453
// foreachPodResourceClaim only returns errors for "not
452454
// schedulable".
453-
logger.V(6).Info("pod is not schedulable after resource claim change", "pod", klog.KObj(pod), "claim", klog.KObj(modifiedClaim), "reason", err.Error())
455+
if loggerV := logger.V(6); loggerV.Enabled() {
456+
owner := metav1.GetControllerOf(modifiedClaim)
457+
loggerV.Info("pod is not schedulable after resource claim change", "pod", klog.KObj(pod), "claim", klog.KObj(modifiedClaim), "claimOwner", owner, "reason", err.Error())
458+
}
454459
return framework.QueueSkip, nil
455460
}
456461

@@ -496,6 +501,33 @@ func (pl *dynamicResources) isSchedulableAfterClaimChange(logger klog.Logger, po
496501
return framework.Queue, nil
497502
}
498503

504+
// isSchedulableAfterPodChange is invoked for update pod events reported by
505+
// an informer. It checks whether that change adds the ResourceClaim(s) that the
506+
// pod has been waiting for.
507+
func (pl *dynamicResources) isSchedulableAfterPodChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
508+
_, modifiedPod, err := schedutil.As[*v1.Pod](nil, newObj)
509+
if err != nil {
510+
// Shouldn't happen.
511+
return framework.Queue, fmt.Errorf("unexpected object in isSchedulableAfterClaimChange: %w", err)
512+
}
513+
514+
if pod.UID != modifiedPod.UID {
515+
logger.V(7).Info("pod is not schedulable after change in other pod", "pod", klog.KObj(pod), "modifiedPod", klog.KObj(modifiedPod))
516+
return framework.QueueSkip, nil
517+
}
518+
519+
if err := pl.foreachPodResourceClaim(modifiedPod, nil); err != nil {
520+
// This is not an unexpected error: we know that
521+
// foreachPodResourceClaim only returns errors for "not
522+
// schedulable".
523+
logger.V(6).Info("pod is not schedulable after being updated", "pod", klog.KObj(pod))
524+
return framework.QueueSkip, nil
525+
}
526+
527+
logger.V(5).Info("pod got updated and is schedulable", "pod", klog.KObj(pod))
528+
return framework.Queue, nil
529+
}
530+
499531
// isSchedulableAfterResourceSliceChange is invoked for add and update slice events reported by
500532
// an informer. Such changes can make an unschedulable pod schedulable when the pod requests a device
501533
// and the change adds a suitable device.

0 commit comments

Comments
 (0)