Skip to content

Commit 59051eb

Browse files
authored
Merge pull request kubernetes#126029 from sanposhiho/backoff-preenqueue
scheduler: impose a backoff penalty on gated Pods
2 parents e74205b + b5a1569 commit 59051eb

File tree

3 files changed

+52
-26
lines changed

3 files changed

+52
-26
lines changed

pkg/scheduler/backend/queue/scheduling_queue.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -627,9 +627,6 @@ func (p *PriorityQueue) activate(logger klog.Logger, pod *v1.Pod) bool {
627627
// isPodBackingoff returns true if a pod is still waiting for its backoff timer.
628628
// If this returns true, the pod should not be re-tried.
629629
func (p *PriorityQueue) isPodBackingoff(podInfo *framework.QueuedPodInfo) bool {
630-
if podInfo.Gated {
631-
return false
632-
}
633630
boTime := p.getBackoffTime(podInfo)
634631
return boTime.After(p.clock.Now())
635632
}
@@ -1239,6 +1236,12 @@ func (p *PriorityQueue) getBackoffTime(podInfo *framework.QueuedPodInfo) time.Ti
12391236
// calculateBackoffDuration is a helper function for calculating the backoffDuration
12401237
// based on the number of attempts the pod has made.
12411238
func (p *PriorityQueue) calculateBackoffDuration(podInfo *framework.QueuedPodInfo) time.Duration {
1239+
if podInfo.Attempts == 0 {
1240+
// When the Pod hasn't experienced any scheduling attempts,
1241+
// they aren't obliged to get a backoff penalty at all.
1242+
return 0
1243+
}
1244+
12421245
duration := p.podInitialBackoffDuration
12431246
for i := 1; i < podInfo.Attempts; i++ {
12441247
// Use subtraction instead of addition or multiplication to avoid overflow.

pkg/scheduler/backend/queue/scheduling_queue_test.go

Lines changed: 45 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -697,7 +697,7 @@ func Test_InFlightPods(t *testing.T) {
697697
case action.eventHappens != nil:
698698
q.MoveAllToActiveOrBackoffQueue(logger, *action.eventHappens, nil, nil, nil)
699699
case action.podEnqueued != nil:
700-
err := q.AddUnschedulableIfNotPresent(logger, action.podEnqueued, q.SchedulingCycle())
700+
err := q.AddUnschedulableIfNotPresent(logger, attemptQueuedPodInfo(action.podEnqueued), q.SchedulingCycle())
701701
if err != nil {
702702
t.Fatalf("unexpected error from AddUnschedulableIfNotPresent: %v", err)
703703
}
@@ -1028,7 +1028,7 @@ func TestPriorityQueue_Update(t *testing.T) {
10281028
name: "when updating a pod which is in unschedulable queue and is backing off, it will be moved to backoff queue",
10291029
wantQ: backoffQ,
10301030
prepareFunc: func(t *testing.T, logger klog.Logger, q *PriorityQueue) (oldPod, newPod *v1.Pod) {
1031-
q.unschedulablePods.addOrUpdate(q.newQueuedPodInfo(medPriorityPodInfo.Pod, queuePlugin))
1031+
q.unschedulablePods.addOrUpdate(attemptQueuedPodInfo(q.newQueuedPodInfo(medPriorityPodInfo.Pod, queuePlugin)))
10321032
updatedPod := medPriorityPodInfo.Pod.DeepCopy()
10331033
updatedPod.Annotations["foo"] = "test"
10341034
return medPriorityPodInfo.Pod, updatedPod
@@ -1039,7 +1039,7 @@ func TestPriorityQueue_Update(t *testing.T) {
10391039
name: "when updating a pod which is in unschedulable queue and is not backing off, it will be moved to active queue",
10401040
wantQ: activeQ,
10411041
prepareFunc: func(t *testing.T, logger klog.Logger, q *PriorityQueue) (oldPod, newPod *v1.Pod) {
1042-
q.unschedulablePods.addOrUpdate(q.newQueuedPodInfo(medPriorityPodInfo.Pod, queuePlugin))
1042+
q.unschedulablePods.addOrUpdate(attemptQueuedPodInfo(q.newQueuedPodInfo(medPriorityPodInfo.Pod, queuePlugin)))
10431043
updatedPod := medPriorityPodInfo.Pod.DeepCopy()
10441044
updatedPod.Annotations["foo"] = "test1"
10451045
// Move clock by podInitialBackoffDuration, so that pods in the unschedulablePods would pass the backing off,
@@ -1175,7 +1175,7 @@ func TestPriorityQueue_UpdateWhenInflight(t *testing.T) {
11751175
// test-pod got rejected by fakePlugin,
11761176
// but the update event that it just got may change this scheduling result,
11771177
// and hence we should put this pod to activeQ/backoffQ.
1178-
err := q.AddUnschedulableIfNotPresent(logger, newQueuedPodInfoForLookup(updatedPod, "fakePlugin"), q.SchedulingCycle())
1178+
err := q.AddUnschedulableIfNotPresent(logger, attemptQueuedPodInfo(newQueuedPodInfoForLookup(updatedPod, "fakePlugin")), q.SchedulingCycle())
11791179
if err != nil {
11801180
t.Fatalf("unexpected error from AddUnschedulableIfNotPresent: %v", err)
11811181
}
@@ -1519,7 +1519,7 @@ func TestPriorityQueue_MoveAllToActiveOrBackoffQueueWithQueueingHint(t *testing.
15191519
},
15201520
{
15211521
name: "Queue queues pod to backoffQ if Pod is backing off",
1522-
podInfo: &framework.QueuedPodInfo{PodInfo: mustNewPodInfo(p), UnschedulablePlugins: sets.New("foo")},
1522+
podInfo: &framework.QueuedPodInfo{PodInfo: mustNewPodInfo(p), Attempts: 1, UnschedulablePlugins: sets.New("foo")},
15231523
hint: queueHintReturnQueue,
15241524
expectedQ: backoffQ,
15251525
},
@@ -1550,6 +1550,12 @@ func TestPriorityQueue_MoveAllToActiveOrBackoffQueueWithQueueingHint(t *testing.
15501550
hint: queueHintReturnQueue,
15511551
expectedQ: activeQ,
15521552
},
1553+
{
1554+
name: "Pod that experienced a scheduling failure before should be queued to backoffQ after un-gated",
1555+
podInfo: setQueuedPodInfoGated(&framework.QueuedPodInfo{PodInfo: mustNewPodInfo(p), Attempts: 1, UnschedulablePlugins: sets.New("foo")}),
1556+
hint: queueHintReturnQueue,
1557+
expectedQ: backoffQ,
1558+
},
15531559
}
15541560

15551561
for _, test := range tests {
@@ -1618,11 +1624,11 @@ func TestPriorityQueue_MoveAllToActiveOrBackoffQueue(t *testing.T) {
16181624
t.Errorf("Expected: %v after Pop, but got: %v", highPriorityPodInfo.Pod.Name, p.Pod.Name)
16191625
}
16201626
expectInFlightPods(t, q, unschedulablePodInfo.Pod.UID, highPriorityPodInfo.Pod.UID)
1621-
err := q.AddUnschedulableIfNotPresent(logger, q.newQueuedPodInfo(unschedulablePodInfo.Pod, "fooPlugin"), q.SchedulingCycle())
1627+
err := q.AddUnschedulableIfNotPresent(logger, attemptQueuedPodInfo(q.newQueuedPodInfo(unschedulablePodInfo.Pod, "fooPlugin")), q.SchedulingCycle())
16221628
if err != nil {
16231629
t.Fatalf("unexpected error from AddUnschedulableIfNotPresent: %v", err)
16241630
}
1625-
err = q.AddUnschedulableIfNotPresent(logger, q.newQueuedPodInfo(highPriorityPodInfo.Pod, "fooPlugin"), q.SchedulingCycle())
1631+
err = q.AddUnschedulableIfNotPresent(logger, attemptQueuedPodInfo(q.newQueuedPodInfo(highPriorityPodInfo.Pod, "fooPlugin")), q.SchedulingCycle())
16261632
if err != nil {
16271633
t.Fatalf("unexpected error from AddUnschedulableIfNotPresent: %v", err)
16281634
}
@@ -1635,7 +1641,7 @@ func TestPriorityQueue_MoveAllToActiveOrBackoffQueue(t *testing.T) {
16351641
}
16361642
expectInFlightPods(t, q, hpp1.UID)
16371643
// This Pod will go to backoffQ because no failure plugin is associated with it.
1638-
err = q.AddUnschedulableIfNotPresent(logger, q.newQueuedPodInfo(hpp1), q.SchedulingCycle())
1644+
err = q.AddUnschedulableIfNotPresent(logger, attemptQueuedPodInfo(q.newQueuedPodInfo(hpp1)), q.SchedulingCycle())
16391645
if err != nil {
16401646
t.Fatalf("unexpected error from AddUnschedulableIfNotPresent: %v", err)
16411647
}
@@ -1648,7 +1654,7 @@ func TestPriorityQueue_MoveAllToActiveOrBackoffQueue(t *testing.T) {
16481654
}
16491655
expectInFlightPods(t, q, hpp2.UID)
16501656
// This Pod will go to the unschedulable Pod pool.
1651-
err = q.AddUnschedulableIfNotPresent(logger, q.newQueuedPodInfo(hpp2, "barPlugin"), q.SchedulingCycle())
1657+
err = q.AddUnschedulableIfNotPresent(logger, attemptQueuedPodInfo(q.newQueuedPodInfo(hpp2, "barPlugin")), q.SchedulingCycle())
16521658
if err != nil {
16531659
t.Fatalf("unexpected error from AddUnschedulableIfNotPresent: %v", err)
16541660
}
@@ -1691,9 +1697,9 @@ func TestPriorityQueue_MoveAllToActiveOrBackoffQueue(t *testing.T) {
16911697
if p, err := q.Pop(logger); err != nil || p.Pod != hpp1 {
16921698
t.Errorf("Expected: %v after Pop, but got: %v", hpp1, p.Pod.Name)
16931699
}
1694-
unschedulableQueuedPodInfo := q.newQueuedPodInfo(unschedulablePodInfo.Pod, "fooPlugin")
1695-
highPriorityQueuedPodInfo := q.newQueuedPodInfo(highPriorityPodInfo.Pod, "fooPlugin")
1696-
hpp1QueuedPodInfo := q.newQueuedPodInfo(hpp1)
1700+
unschedulableQueuedPodInfo := attemptQueuedPodInfo(q.newQueuedPodInfo(unschedulablePodInfo.Pod, "fooPlugin"))
1701+
highPriorityQueuedPodInfo := attemptQueuedPodInfo(q.newQueuedPodInfo(highPriorityPodInfo.Pod, "fooPlugin"))
1702+
hpp1QueuedPodInfo := attemptQueuedPodInfo(q.newQueuedPodInfo(hpp1))
16971703
expectInFlightPods(t, q, medPriorityPodInfo.Pod.UID, unschedulablePodInfo.Pod.UID, highPriorityPodInfo.Pod.UID, hpp1.UID)
16981704
err = q.AddUnschedulableIfNotPresent(logger, unschedulableQueuedPodInfo, q.SchedulingCycle())
16991705
if err != nil {
@@ -1757,25 +1763,25 @@ func TestPriorityQueue_MoveAllToActiveOrBackoffQueueWithOutQueueingHint(t *testi
17571763
// To simulate the pod is failed in scheduling in the real world, Pop() the pod from activeQ before AddUnschedulableIfNotPresent()s below.
17581764
q.Add(logger, medPriorityPodInfo.Pod)
17591765

1760-
err := q.AddUnschedulableIfNotPresent(logger, q.newQueuedPodInfo(unschedulablePodInfo.Pod, "fooPlugin"), q.SchedulingCycle())
1766+
err := q.AddUnschedulableIfNotPresent(logger, attemptQueuedPodInfo(q.newQueuedPodInfo(unschedulablePodInfo.Pod, "fooPlugin")), q.SchedulingCycle())
17611767
if err != nil {
17621768
t.Fatalf("unexpected error from AddUnschedulableIfNotPresent: %v", err)
17631769
}
1764-
err = q.AddUnschedulableIfNotPresent(logger, q.newQueuedPodInfo(highPriorityPodInfo.Pod, "fooPlugin"), q.SchedulingCycle())
1770+
err = q.AddUnschedulableIfNotPresent(logger, attemptQueuedPodInfo(q.newQueuedPodInfo(highPriorityPodInfo.Pod, "fooPlugin")), q.SchedulingCycle())
17651771
if err != nil {
17661772
t.Fatalf("unexpected error from AddUnschedulableIfNotPresent: %v", err)
17671773
}
17681774
// Construct a Pod, but don't associate its scheduler failure to any plugin
17691775
hpp1 := clonePod(highPriorityPodInfo.Pod, "hpp1")
17701776
// This Pod will go to backoffQ because no failure plugin is associated with it.
1771-
err = q.AddUnschedulableIfNotPresent(logger, q.newQueuedPodInfo(hpp1), q.SchedulingCycle())
1777+
err = q.AddUnschedulableIfNotPresent(logger, attemptQueuedPodInfo(q.newQueuedPodInfo(hpp1)), q.SchedulingCycle())
17721778
if err != nil {
17731779
t.Fatalf("unexpected error from AddUnschedulableIfNotPresent: %v", err)
17741780
}
17751781
// Construct another Pod, and associate its scheduler failure to plugin "barPlugin".
17761782
hpp2 := clonePod(highPriorityPodInfo.Pod, "hpp2")
17771783
// This Pod will go to the unschedulable Pod pool.
1778-
err = q.AddUnschedulableIfNotPresent(logger, q.newQueuedPodInfo(hpp2, "barPlugin"), q.SchedulingCycle())
1784+
err = q.AddUnschedulableIfNotPresent(logger, attemptQueuedPodInfo(q.newQueuedPodInfo(hpp2, "barPlugin")), q.SchedulingCycle())
17791785
if err != nil {
17801786
t.Fatalf("unexpected error from AddUnschedulableIfNotPresent: %v", err)
17811787
}
@@ -1803,9 +1809,9 @@ func TestPriorityQueue_MoveAllToActiveOrBackoffQueueWithOutQueueingHint(t *testi
18031809
}
18041810
}
18051811

1806-
unschedulableQueuedPodInfo := q.newQueuedPodInfo(unschedulablePodInfo.Pod, "fooPlugin")
1807-
highPriorityQueuedPodInfo := q.newQueuedPodInfo(highPriorityPodInfo.Pod, "fooPlugin")
1808-
hpp1QueuedPodInfo := q.newQueuedPodInfo(hpp1)
1812+
unschedulableQueuedPodInfo := attemptQueuedPodInfo(q.newQueuedPodInfo(unschedulablePodInfo.Pod, "fooPlugin"))
1813+
highPriorityQueuedPodInfo := attemptQueuedPodInfo(q.newQueuedPodInfo(highPriorityPodInfo.Pod, "fooPlugin"))
1814+
hpp1QueuedPodInfo := attemptQueuedPodInfo(q.newQueuedPodInfo(hpp1))
18091815
err = q.AddUnschedulableIfNotPresent(logger, unschedulableQueuedPodInfo, q.SchedulingCycle())
18101816
if err != nil {
18111817
t.Fatalf("unexpected error from AddUnschedulableIfNotPresent: %v", err)
@@ -2071,11 +2077,11 @@ func TestPriorityQueue_PendingPods(t *testing.T) {
20712077
t.Errorf("Expected: %v after Pop, but got: %v", highPriorityPodInfo.Pod.Name, p.Pod.Name)
20722078
}
20732079
q.Add(logger, medPriorityPodInfo.Pod)
2074-
err := q.AddUnschedulableIfNotPresent(logger, q.newQueuedPodInfo(unschedulablePodInfo.Pod, "plugin"), q.SchedulingCycle())
2080+
err := q.AddUnschedulableIfNotPresent(logger, attemptQueuedPodInfo(q.newQueuedPodInfo(unschedulablePodInfo.Pod, "plugin")), q.SchedulingCycle())
20752081
if err != nil {
20762082
t.Fatalf("unexpected error from AddUnschedulableIfNotPresent: %v", err)
20772083
}
2078-
err = q.AddUnschedulableIfNotPresent(logger, q.newQueuedPodInfo(highPriorityPodInfo.Pod, "plugin"), q.SchedulingCycle())
2084+
err = q.AddUnschedulableIfNotPresent(logger, attemptQueuedPodInfo(q.newQueuedPodInfo(highPriorityPodInfo.Pod, "plugin")), q.SchedulingCycle())
20792085
if err != nil {
20802086
t.Fatalf("unexpected error from AddUnschedulableIfNotPresent: %v", err)
20812087
}
@@ -2720,6 +2726,7 @@ var (
27202726
Reason: v1.PodReasonUnschedulable,
27212727
Message: "fake scheduling failure",
27222728
})
2729+
pInfo = attemptQueuedPodInfo(pInfo)
27232730
}
27242731
queue.unschedulablePods.addOrUpdate(pInfo)
27252732
}
@@ -2859,6 +2866,16 @@ func TestPendingPodsMetric(t *testing.T) {
28592866
totalWithDelay := 20
28602867
pInfosWithDelay := makeQueuedPodInfos(totalWithDelay, "z", queueable, timestamp.Add(2*time.Second))
28612868

2869+
resetPodInfos := func() {
2870+
// reset PodInfo's Attempts because they influence the backoff time calculation.
2871+
for i := range pInfos {
2872+
pInfos[i].Attempts = 0
2873+
}
2874+
for i := range pInfosWithDelay {
2875+
pInfosWithDelay[i].Attempts = 0
2876+
}
2877+
}
2878+
28622879
tests := []struct {
28632880
name string
28642881
operations []operation
@@ -3093,6 +3110,7 @@ scheduler_plugin_execution_duration_seconds_count{extension_point="PreEnqueue",p
30933110
for _, test := range tests {
30943111
t.Run(test.name, func(t *testing.T) {
30953112
resetMetrics()
3113+
resetPodInfos()
30963114
logger, ctx := ktesting.NewTestContext(t)
30973115
ctx, cancel := context.WithCancel(ctx)
30983116
defer cancel()
@@ -3465,7 +3483,7 @@ func TestMoveAllToActiveOrBackoffQueue_PreEnqueueChecks(t *testing.T) {
34653483
t.Errorf("Expected: %v after Pop, but got: %v", podInfo.Pod.Name, p.Pod.Name)
34663484
}
34673485
podInfo.UnschedulablePlugins = sets.New("plugin")
3468-
err := q.AddUnschedulableIfNotPresent(logger, podInfo, q.activeQ.schedulingCycle())
3486+
err := q.AddUnschedulableIfNotPresent(logger, attemptQueuedPodInfo(podInfo), q.activeQ.schedulingCycle())
34693487
if err != nil {
34703488
t.Fatalf("unexpected error from AddUnschedulableIfNotPresent: %v", err)
34713489
}
@@ -3870,3 +3888,8 @@ func Test_queuedPodInfo_gatedSetUponCreationAndUnsetUponUpdate(t *testing.T) {
38703888
t.Error("Expected pod to be ungated")
38713889
}
38723890
}
3891+
3892+
func attemptQueuedPodInfo(podInfo *framework.QueuedPodInfo) *framework.QueuedPodInfo {
3893+
podInfo.Attempts++
3894+
return podInfo
3895+
}

pkg/scheduler/framework/types.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ type QueuedPodInfo struct {
232232
// The time pod added to the scheduling queue.
233233
Timestamp time.Time
234234
// Number of schedule attempts before successfully scheduled.
235-
// It's used to record the # attempts metric.
235+
// It's used to record the # attempts metric and calculate the backoff time this Pod is obliged to get before retrying.
236236
Attempts int
237237
// The time when the pod is added to the queue for the first time. The pod may be added
238238
// back to the queue multiple times before it's successfully scheduled.

0 commit comments

Comments
 (0)