Skip to content

Commit 0446f6c

Browse files
authored
Merge pull request kubernetes#130492 from macsko/call_preenqueue_plugins_when_adding_pod_to_backoffq
Call PreEnqueue plugins before adding pod to backoffQ
2 parents 83d33a9 + 9df0f6b commit 0446f6c

File tree

5 files changed

+225
-42
lines changed

5 files changed

+225
-42
lines changed

pkg/features/kube_features.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -632,6 +632,13 @@ const (
632632
// which improves the scheduling latency when the preemption involves in.
633633
SchedulerAsyncPreemption featuregate.Feature = "SchedulerAsyncPreemption"
634634

635+
// owner: @macsko
636+
// kep: http://kep.k8s.io/5142
637+
//
638+
// Improves scheduling queue behavior by popping pods from the backoffQ when the activeQ is empty.
639+
// This allows to process potentially schedulable pods ASAP, eliminating a penalty effect of the backoff queue.
640+
SchedulerPopFromBackoffQ featuregate.Feature = "SchedulerPopFromBackoffQ"
641+
635642
// owner: @atosatto @yuanchen8911
636643
// kep: http://kep.k8s.io/3902
637644
//

pkg/features/versioned_kube_features.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -660,6 +660,10 @@ var defaultVersionedKubernetesFeatureGates = map[featuregate.Feature]featuregate
660660
{Version: version.MustParse("1.32"), Default: false, PreRelease: featuregate.Alpha},
661661
},
662662

663+
SchedulerPopFromBackoffQ: {
664+
{Version: version.MustParse("1.33"), Default: true, PreRelease: featuregate.Beta},
665+
},
666+
663667
SchedulerQueueingHints: {
664668
{Version: version.MustParse("1.28"), Default: false, PreRelease: featuregate.Beta},
665669
{Version: version.MustParse("1.32"), Default: true, PreRelease: featuregate.Beta},

pkg/scheduler/backend/queue/scheduling_queue.go

Lines changed: 44 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,8 @@ type PriorityQueue struct {
189189

190190
// isSchedulingQueueHintEnabled indicates whether the feature gate for the scheduling queue is enabled.
191191
isSchedulingQueueHintEnabled bool
192+
// isPopFromBackoffQEnabled indicates whether the feature gate SchedulerPopFromBackoffQ is enabled.
193+
isPopFromBackoffQEnabled bool
192194
}
193195

194196
// QueueingHintFunction is the wrapper of QueueingHintFn that has PluginName.
@@ -325,6 +327,7 @@ func NewPriorityQueue(
325327
}
326328

327329
isSchedulingQueueHintEnabled := utilfeature.DefaultFeatureGate.Enabled(features.SchedulerQueueingHints)
330+
isPopFromBackoffQEnabled := utilfeature.DefaultFeatureGate.Enabled(features.SchedulerPopFromBackoffQ)
328331

329332
pq := &PriorityQueue{
330333
clock: options.clock,
@@ -339,6 +342,7 @@ func NewPriorityQueue(
339342
pluginMetricsSamplePercent: options.pluginMetricsSamplePercent,
340343
moveRequestCycle: -1,
341344
isSchedulingQueueHintEnabled: isSchedulingQueueHintEnabled,
345+
isPopFromBackoffQEnabled: isPopFromBackoffQEnabled,
342346
}
343347
pq.nsLister = informerFactory.Core().V1().Namespaces().Lister()
344348
pq.nominator = newPodNominator(options.podLister)
@@ -545,13 +549,17 @@ func (p *PriorityQueue) runPreEnqueuePlugin(ctx context.Context, pl framework.Pr
545549
return s
546550
}
547551

548-
// moveToActiveQ tries to add pod to active queue and remove it from unschedulable and backoff queues.
549-
// It returns 2 parameters:
550-
// 1. a boolean flag to indicate whether the pod is added successfully.
551-
// 2. an error for the caller to act on.
552+
// moveToActiveQ tries to add the pod to the active queue.
553+
// If the pod doesn't pass PreEnqueue plugins, it gets added to unschedulablePods instead.
554+
// It returns a boolean flag to indicate whether the pod is added successfully.
552555
func (p *PriorityQueue) moveToActiveQ(logger klog.Logger, pInfo *framework.QueuedPodInfo, event string) bool {
553556
gatedBefore := pInfo.Gated
554-
pInfo.Gated = !p.runPreEnqueuePlugins(context.Background(), pInfo)
557+
// If SchedulerPopFromBackoffQ feature gate is enabled,
558+
// PreEnqueue plugins were called when the pod was added to the backoffQ.
559+
// Don't need to repeat it here when the pod is directly moved from the backoffQ.
560+
if !p.isPopFromBackoffQEnabled || event != framework.BackoffComplete {
561+
pInfo.Gated = !p.runPreEnqueuePlugins(context.Background(), pInfo)
562+
}
555563

556564
added := false
557565
p.activeQ.underLock(func(unlockedActiveQ unlockedActiveQueuer) {
@@ -588,6 +596,28 @@ func (p *PriorityQueue) moveToActiveQ(logger klog.Logger, pInfo *framework.Queue
588596
return added
589597
}
590598

599+
// moveToBackoffQ tries to add the pod to the backoff queue.
600+
// If SchedulerPopFromBackoffQ feature gate is enabled and the pod doesn't pass PreEnqueue plugins, it gets added to unschedulablePods instead.
601+
// It returns a boolean flag to indicate whether the pod is added successfully.
602+
func (p *PriorityQueue) moveToBackoffQ(logger klog.Logger, pInfo *framework.QueuedPodInfo, event string) bool {
603+
// If SchedulerPopFromBackoffQ feature gate is enabled,
604+
// PreEnqueue plugins are called on inserting pods to the backoffQ,
605+
// not to call them again on popping out.
606+
if p.isPopFromBackoffQEnabled {
607+
pInfo.Gated = !p.runPreEnqueuePlugins(context.Background(), pInfo)
608+
if pInfo.Gated {
609+
if p.unschedulablePods.get(pInfo.Pod) == nil {
610+
p.unschedulablePods.addOrUpdate(pInfo, event)
611+
logger.V(5).Info("Pod moved to an internal scheduling queue", "pod", klog.KObj(pInfo.Pod), "event", event, "queue", unschedulablePods)
612+
}
613+
return false
614+
}
615+
}
616+
p.backoffQ.add(logger, pInfo, event)
617+
logger.V(5).Info("Pod moved to an internal scheduling queue", "pod", klog.KObj(pInfo.Pod), "event", event, "queue", backoffQ)
618+
return true
619+
}
620+
591621
// Add adds a pod to the active queue. It should be called only when a new pod
592622
// is added so there is no chance the pod is already in active/unschedulable/backoff queues
593623
func (p *PriorityQueue) Add(logger klog.Logger, pod *v1.Pod) {
@@ -724,8 +754,7 @@ func (p *PriorityQueue) addUnschedulableWithoutQueueingHint(logger klog.Logger,
724754
// - No unschedulable plugins are associated with this Pod,
725755
// meaning something unusual (a temporal failure on kube-apiserver, etc) happened and this Pod gets moved back to the queue.
726756
// In this case, we should retry scheduling it because this Pod may not be retried until the next flush.
727-
p.backoffQ.add(logger, pInfo, framework.ScheduleAttemptFailure)
728-
logger.V(5).Info("Pod moved to an internal scheduling queue", "pod", klog.KObj(pod), "event", framework.ScheduleAttemptFailure, "queue", backoffQ)
757+
_ = p.moveToBackoffQ(logger, pInfo, framework.ScheduleAttemptFailure)
729758
} else {
730759
p.unschedulablePods.addOrUpdate(pInfo, framework.ScheduleAttemptFailure)
731760
logger.V(5).Info("Pod moved to an internal scheduling queue", "pod", klog.KObj(pod), "event", framework.ScheduleAttemptFailure, "queue", unschedulablePods)
@@ -934,13 +963,13 @@ func (p *PriorityQueue) Update(logger klog.Logger, oldPod, newPod *v1.Pod) {
934963
// Pod might have completed its backoff time while being in unschedulablePods,
935964
// so we should check isPodBackingoff before moving the pod to backoffQ.
936965
if p.backoffQ.isPodBackingoff(pInfo) {
937-
p.backoffQ.add(logger, pInfo, framework.EventUnscheduledPodUpdate.Label())
938-
p.unschedulablePods.delete(pInfo.Pod, gated)
939-
logger.V(5).Info("Pod moved to an internal scheduling queue", "pod", klog.KObj(pInfo.Pod), "event", framework.EventUnscheduledPodUpdate.Label(), "queue", backoffQ)
966+
if added := p.moveToBackoffQ(logger, pInfo, framework.EventUnscheduledPodUpdate.Label()); added {
967+
p.unschedulablePods.delete(pInfo.Pod, gated)
968+
}
940969
return
941970
}
942971

943-
if added := p.moveToActiveQ(logger, pInfo, framework.BackoffComplete); added {
972+
if added := p.moveToActiveQ(logger, pInfo, framework.EventUnscheduledPodUpdate.Label()); added {
944973
p.activeQ.broadcast()
945974
}
946975
return
@@ -1044,8 +1073,10 @@ func (p *PriorityQueue) requeuePodViaQueueingHint(logger klog.Logger, pInfo *fra
10441073
// Pod might have completed its backoff time while being in unschedulablePods,
10451074
// so we should check isPodBackingoff before moving the pod to backoffQ.
10461075
if strategy == queueAfterBackoff && p.backoffQ.isPodBackingoff(pInfo) {
1047-
p.backoffQ.add(logger, pInfo, event)
1048-
return backoffQ
1076+
if added := p.moveToBackoffQ(logger, pInfo, event); added {
1077+
return backoffQ
1078+
}
1079+
return unschedulablePods
10491080
}
10501081

10511082
// Reach here if schedulingHint is QueueImmediately, or schedulingHint is Queue but the pod is not backing off.

pkg/scheduler/backend/queue/scheduling_queue_test.go

Lines changed: 164 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1443,24 +1443,28 @@ func (pl *preEnqueuePlugin) PreEnqueue(ctx context.Context, p *v1.Pod) *framewor
14431443
return framework.NewStatus(framework.UnschedulableAndUnresolvable, "pod name not in allowlists")
14441444
}
14451445

1446-
func TestPriorityQueue_addToActiveQ(t *testing.T) {
1446+
func TestPriorityQueue_moveToActiveQ(t *testing.T) {
14471447
tests := []struct {
1448-
name string
1449-
plugins []framework.PreEnqueuePlugin
1450-
pod *v1.Pod
1451-
wantUnschedulablePods int
1452-
wantSuccess bool
1448+
name string
1449+
plugins []framework.PreEnqueuePlugin
1450+
pod *v1.Pod
1451+
event string
1452+
popFromBackoffQEnabled []bool
1453+
wantUnschedulablePods int
1454+
wantSuccess bool
14531455
}{
14541456
{
14551457
name: "no plugins registered",
14561458
pod: st.MakePod().Name("p").Label("p", "").Obj(),
1459+
event: framework.EventUnscheduledPodAdd.Label(),
14571460
wantUnschedulablePods: 0,
14581461
wantSuccess: true,
14591462
},
14601463
{
14611464
name: "preEnqueue plugin registered, pod name not in allowlists",
14621465
plugins: []framework.PreEnqueuePlugin{&preEnqueuePlugin{}, &preEnqueuePlugin{}},
14631466
pod: st.MakePod().Name("p").Label("p", "").Obj(),
1467+
event: framework.EventUnscheduledPodAdd.Label(),
14641468
wantUnschedulablePods: 1,
14651469
wantSuccess: false,
14661470
},
@@ -1471,47 +1475,178 @@ func TestPriorityQueue_addToActiveQ(t *testing.T) {
14711475
&preEnqueuePlugin{allowlists: []string{"foo"}},
14721476
},
14731477
pod: st.MakePod().Name("bar").Label("bar", "").Obj(),
1478+
event: framework.EventUnscheduledPodAdd.Label(),
14741479
wantUnschedulablePods: 1,
14751480
wantSuccess: false,
14761481
},
1482+
{
1483+
// With SchedulerPopFromBackoffQ enabled, the queue assumes the pod has already passed PreEnqueue,
1484+
// and it doesn't run PreEnqueue again, always puts the pod to activeQ.
1485+
name: "preEnqueue plugin registered, preEnqueue plugin would reject the pod, but isn't run",
1486+
plugins: []framework.PreEnqueuePlugin{
1487+
&preEnqueuePlugin{allowlists: []string{"foo", "bar"}},
1488+
&preEnqueuePlugin{allowlists: []string{"foo"}},
1489+
},
1490+
pod: st.MakePod().Name("bar").Label("bar", "").Obj(),
1491+
event: framework.BackoffComplete,
1492+
popFromBackoffQEnabled: []bool{false},
1493+
wantUnschedulablePods: 1,
1494+
wantSuccess: false,
1495+
},
1496+
{
1497+
name: "preEnqueue plugin registered, pod would fail one preEnqueue plugin, but is after backoff",
1498+
plugins: []framework.PreEnqueuePlugin{
1499+
&preEnqueuePlugin{allowlists: []string{"foo", "bar"}},
1500+
&preEnqueuePlugin{allowlists: []string{"foo"}},
1501+
},
1502+
pod: st.MakePod().Name("bar").Label("bar", "").Obj(),
1503+
event: framework.BackoffComplete,
1504+
popFromBackoffQEnabled: []bool{true},
1505+
wantUnschedulablePods: 0,
1506+
wantSuccess: true,
1507+
},
14771508
{
14781509
name: "preEnqueue plugin registered, pod passed all preEnqueue plugins",
14791510
plugins: []framework.PreEnqueuePlugin{
14801511
&preEnqueuePlugin{allowlists: []string{"foo", "bar"}},
14811512
&preEnqueuePlugin{allowlists: []string{"bar"}},
14821513
},
14831514
pod: st.MakePod().Name("bar").Label("bar", "").Obj(),
1515+
event: framework.EventUnscheduledPodAdd.Label(),
14841516
wantUnschedulablePods: 0,
14851517
wantSuccess: true,
14861518
},
14871519
}
14881520

14891521
for _, tt := range tests {
1490-
t.Run(tt.name, func(t *testing.T) {
1491-
logger, ctx := ktesting.NewTestContext(t)
1492-
ctx, cancel := context.WithCancel(ctx)
1493-
defer cancel()
1522+
if tt.popFromBackoffQEnabled == nil {
1523+
tt.popFromBackoffQEnabled = []bool{true, false}
1524+
}
1525+
for _, popFromBackoffQEnabled := range tt.popFromBackoffQEnabled {
1526+
t.Run(fmt.Sprintf("%s popFromBackoffQEnabled(%v)", tt.name, popFromBackoffQEnabled), func(t *testing.T) {
1527+
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerPopFromBackoffQ, popFromBackoffQEnabled)
1528+
logger, ctx := ktesting.NewTestContext(t)
1529+
ctx, cancel := context.WithCancel(ctx)
1530+
defer cancel()
14941531

1495-
m := map[string][]framework.PreEnqueuePlugin{"": tt.plugins}
1496-
q := NewTestQueueWithObjects(ctx, newDefaultQueueSort(), []runtime.Object{tt.pod}, WithPreEnqueuePluginMap(m),
1497-
WithPodInitialBackoffDuration(time.Second*30), WithPodMaxBackoffDuration(time.Second*60))
1498-
got := q.moveToActiveQ(logger, q.newQueuedPodInfo(tt.pod), framework.EventUnscheduledPodAdd.Label())
1499-
if got != tt.wantSuccess {
1500-
t.Errorf("Unexpected result: want %v, but got %v", tt.wantSuccess, got)
1501-
}
1502-
if tt.wantUnschedulablePods != len(q.unschedulablePods.podInfoMap) {
1503-
t.Errorf("Unexpected unschedulablePods: want %v, but got %v", tt.wantUnschedulablePods, len(q.unschedulablePods.podInfoMap))
1504-
}
1532+
m := map[string][]framework.PreEnqueuePlugin{"": tt.plugins}
1533+
q := NewTestQueueWithObjects(ctx, newDefaultQueueSort(), []runtime.Object{tt.pod}, WithPreEnqueuePluginMap(m),
1534+
WithPodInitialBackoffDuration(time.Second*30), WithPodMaxBackoffDuration(time.Second*60))
1535+
got := q.moveToActiveQ(logger, q.newQueuedPodInfo(tt.pod), tt.event)
1536+
if got != tt.wantSuccess {
1537+
t.Errorf("Unexpected result: want %v, but got %v", tt.wantSuccess, got)
1538+
}
1539+
if tt.wantUnschedulablePods != len(q.unschedulablePods.podInfoMap) {
1540+
t.Errorf("Unexpected unschedulablePods: want %v, but got %v", tt.wantUnschedulablePods, len(q.unschedulablePods.podInfoMap))
1541+
}
15051542

1506-
// Simulate an update event.
1507-
clone := tt.pod.DeepCopy()
1508-
metav1.SetMetaDataAnnotation(&clone.ObjectMeta, "foo", "")
1509-
q.Update(logger, tt.pod, clone)
1510-
// Ensure the pod is still located in unschedulablePods.
1511-
if tt.wantUnschedulablePods != len(q.unschedulablePods.podInfoMap) {
1512-
t.Errorf("Unexpected unschedulablePods: want %v, but got %v", tt.wantUnschedulablePods, len(q.unschedulablePods.podInfoMap))
1513-
}
1514-
})
1543+
// Simulate an update event.
1544+
clone := tt.pod.DeepCopy()
1545+
metav1.SetMetaDataAnnotation(&clone.ObjectMeta, "foo", "")
1546+
q.Update(logger, tt.pod, clone)
1547+
// Ensure the pod is still located in unschedulablePods.
1548+
if tt.wantUnschedulablePods != len(q.unschedulablePods.podInfoMap) {
1549+
t.Errorf("Unexpected unschedulablePods: want %v, but got %v", tt.wantUnschedulablePods, len(q.unschedulablePods.podInfoMap))
1550+
}
1551+
})
1552+
}
1553+
}
1554+
}
1555+
1556+
func TestPriorityQueue_moveToBackoffQ(t *testing.T) {
1557+
tests := []struct {
1558+
name string
1559+
plugins []framework.PreEnqueuePlugin
1560+
pod *v1.Pod
1561+
popFromBackoffQEnabled []bool
1562+
wantSuccess bool
1563+
}{
1564+
{
1565+
name: "no plugins registered",
1566+
pod: st.MakePod().Name("p").Label("p", "").Obj(),
1567+
wantSuccess: true,
1568+
},
1569+
{
1570+
name: "preEnqueue plugin registered, pod name would not be in allowlists",
1571+
plugins: []framework.PreEnqueuePlugin{&preEnqueuePlugin{}, &preEnqueuePlugin{}},
1572+
pod: st.MakePod().Name("p").Label("p", "").Obj(),
1573+
popFromBackoffQEnabled: []bool{false},
1574+
wantSuccess: true,
1575+
},
1576+
{
1577+
name: "preEnqueue plugin registered, pod name not in allowlists",
1578+
plugins: []framework.PreEnqueuePlugin{&preEnqueuePlugin{}, &preEnqueuePlugin{}},
1579+
pod: st.MakePod().Name("p").Label("p", "").Obj(),
1580+
popFromBackoffQEnabled: []bool{true},
1581+
wantSuccess: false,
1582+
},
1583+
{
1584+
name: "preEnqueue plugin registered, preEnqueue plugin would reject the pod, but isn't run",
1585+
plugins: []framework.PreEnqueuePlugin{
1586+
&preEnqueuePlugin{allowlists: []string{"foo", "bar"}},
1587+
&preEnqueuePlugin{allowlists: []string{"foo"}},
1588+
},
1589+
pod: st.MakePod().Name("bar").Label("bar", "").Obj(),
1590+
popFromBackoffQEnabled: []bool{false},
1591+
wantSuccess: true,
1592+
},
1593+
{
1594+
name: "preEnqueue plugin registered, pod failed one preEnqueue plugin",
1595+
plugins: []framework.PreEnqueuePlugin{
1596+
&preEnqueuePlugin{allowlists: []string{"foo", "bar"}},
1597+
&preEnqueuePlugin{allowlists: []string{"foo"}},
1598+
},
1599+
pod: st.MakePod().Name("bar").Label("bar", "").Obj(),
1600+
popFromBackoffQEnabled: []bool{true},
1601+
wantSuccess: false,
1602+
},
1603+
{
1604+
name: "preEnqueue plugin registered, pod passed all preEnqueue plugins",
1605+
plugins: []framework.PreEnqueuePlugin{
1606+
&preEnqueuePlugin{allowlists: []string{"foo", "bar"}},
1607+
&preEnqueuePlugin{allowlists: []string{"bar"}},
1608+
},
1609+
pod: st.MakePod().Name("bar").Label("bar", "").Obj(),
1610+
wantSuccess: true,
1611+
},
1612+
}
1613+
1614+
for _, tt := range tests {
1615+
if tt.popFromBackoffQEnabled == nil {
1616+
tt.popFromBackoffQEnabled = []bool{true, false}
1617+
}
1618+
for _, popFromBackoffQEnabled := range tt.popFromBackoffQEnabled {
1619+
t.Run(fmt.Sprintf("%s popFromBackoffQEnabled(%v)", tt.name, popFromBackoffQEnabled), func(t *testing.T) {
1620+
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SchedulerPopFromBackoffQ, popFromBackoffQEnabled)
1621+
logger, ctx := ktesting.NewTestContext(t)
1622+
ctx, cancel := context.WithCancel(ctx)
1623+
defer cancel()
1624+
1625+
m := map[string][]framework.PreEnqueuePlugin{"": tt.plugins}
1626+
q := NewTestQueueWithObjects(ctx, newDefaultQueueSort(), []runtime.Object{tt.pod}, WithPreEnqueuePluginMap(m),
1627+
WithPodInitialBackoffDuration(time.Second*30), WithPodMaxBackoffDuration(time.Second*60))
1628+
pInfo := q.newQueuedPodInfo(tt.pod)
1629+
got := q.moveToBackoffQ(logger, pInfo, framework.EventUnscheduledPodAdd.Label())
1630+
if got != tt.wantSuccess {
1631+
t.Errorf("Unexpected result: want %v, but got %v", tt.wantSuccess, got)
1632+
}
1633+
if tt.wantSuccess {
1634+
if !q.backoffQ.has(pInfo) {
1635+
t.Errorf("Expected pod to be in backoffQ, but it isn't")
1636+
}
1637+
if q.unschedulablePods.get(pInfo.Pod) != nil {
1638+
t.Errorf("Expected pod not to be in unschedulablePods, but it is")
1639+
}
1640+
} else {
1641+
if q.backoffQ.has(pInfo) {
1642+
t.Errorf("Expected pod not to be in backoffQ, but it is")
1643+
}
1644+
if q.unschedulablePods.get(pInfo.Pod) == nil {
1645+
t.Errorf("Expected pod to be in unschedulablePods, but it isn't")
1646+
}
1647+
}
1648+
})
1649+
}
15151650
}
15161651
}
15171652

test/compatibility_lifecycle/reference/versioned_feature_list.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1149,6 +1149,12 @@
11491149
lockToDefault: false
11501150
preRelease: Alpha
11511151
version: "1.32"
1152+
- name: SchedulerPopFromBackoffQ
1153+
versionedSpecs:
1154+
- default: true
1155+
lockToDefault: false
1156+
preRelease: Beta
1157+
version: "1.33"
11521158
- name: SchedulerQueueingHints
11531159
versionedSpecs:
11541160
- default: false

0 commit comments

Comments
 (0)