Skip to content

Commit baf6964

Browse files
committed
fix(scheduler_one): call Done() as soon as possible
1 parent 9682c62 commit baf6964

File tree

3 files changed

+20
-13
lines changed

3 files changed

+20
-13
lines changed

pkg/scheduler/backend/queue/scheduling_queue.go

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -642,20 +642,19 @@ func (p *PriorityQueue) SchedulingCycle() int64 {
642642
// determineSchedulingHintForInFlightPod looks at the unschedulable plugins of the given Pod
643643
// and determines the scheduling hint for this Pod while checking the events that happened during in-flight.
644644
func (p *PriorityQueue) determineSchedulingHintForInFlightPod(logger klog.Logger, pInfo *framework.QueuedPodInfo) queueingStrategy {
645-
events, err := p.activeQ.clusterEventsForPod(logger, pInfo)
646-
if err != nil {
647-
logger.Error(err, "Error getting cluster events for pod", "pod", klog.KObj(pInfo.Pod))
648-
return queueAfterBackoff
649-
}
650-
651-
rejectorPlugins := pInfo.UnschedulablePlugins.Union(pInfo.PendingPlugins)
652-
if len(rejectorPlugins) == 0 {
645+
if len(pInfo.UnschedulablePlugins) == 0 && len(pInfo.PendingPlugins) == 0 {
653646
// No failed plugins are associated with this Pod.
654647
// Meaning something unusual (a temporal failure on kube-apiserver, etc) happened and this Pod gets moved back to the queue.
655648
// In this case, we should retry scheduling it because this Pod may not be retried until the next flush.
656649
return queueAfterBackoff
657650
}
658651

652+
events, err := p.activeQ.clusterEventsForPod(logger, pInfo)
653+
if err != nil {
654+
logger.Error(err, "Error getting cluster events for pod", "pod", klog.KObj(pInfo.Pod))
655+
return queueAfterBackoff
656+
}
657+
659658
// check if there is an event that makes this Pod schedulable based on pInfo.UnschedulablePlugins.
660659
queueingStrategy := queueSkip
661660
for _, e := range events {

pkg/scheduler/framework/types.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -239,8 +239,12 @@ type QueuedPodInfo struct {
239239
// It shouldn't be updated once initialized. It's used to record the e2e scheduling
240240
// latency for a pod.
241241
InitialAttemptTimestamp *time.Time
242-
// UnschedulablePlugins records the plugin names that the Pod failed with Unschedulable or UnschedulableAndUnresolvable status.
243-
// It's registered only when the Pod is rejected in PreFilter, Filter, Reserve, PreBind or Permit (WaitOnPermit).
242+
// UnschedulablePlugins records the plugin names that the Pod failed with Unschedulable or UnschedulableAndUnresolvable status
243+
// at specific extension points: PreFilter, Filter, Reserve, Permit (WaitOnPermit), or PreBind.
244+
// If Pods are rejected at other extension points,
245+
// they're assumed to be unexpected errors (e.g., temporal network issue, plugin implementation issue, etc)
246+
// and retried soon after a backoff period.
247+
// That is because such failures could be solved regardless of incoming cluster events (registered in EventsToRegister).
244248
UnschedulablePlugins sets.Set[string]
245249
// PendingPlugins records the plugin names that the Pod failed with Pending status.
246250
PendingPlugins sets.Set[string]

pkg/scheduler/schedule_one.go

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -126,9 +126,6 @@ func (sched *Scheduler) ScheduleOne(ctx context.Context) {
126126
sched.handleBindingCycleError(bindingCycleCtx, state, fwk, assumedPodInfo, start, scheduleResult, status)
127127
return
128128
}
129-
// Usually, DonePod is called inside the scheduling queue,
130-
// but in this case, we need to call it here because this Pod won't go back to the scheduling queue.
131-
sched.SchedulingQueue.Done(assumedPodInfo.Pod.UID)
132129
}()
133130
}
134131

@@ -309,6 +306,13 @@ func (sched *Scheduler) bindingCycle(
309306
return status
310307
}
311308

309+
// Any failures after this point cannot lead to the Pod being considered unschedulable.
310+
// We define the Pod as "unschedulable" only when Pods are rejected at specific extension points, and PreBind is the last one in the scheduling/binding cycle.
311+
//
312+
// We can call Done() here because
313+
// we can free the cluster events stored in the scheduling queue sonner, which is worth for busy clusters memory consumption wise.
314+
sched.SchedulingQueue.Done(assumedPod.UID)
315+
312316
// Run "bind" plugins.
313317
if status := sched.bind(ctx, fwk, assumedPod, scheduleResult.SuggestedHost, state); !status.IsSuccess() {
314318
return status

0 commit comments

Comments
 (0)