Skip to content

Commit 0e71fac

Browse files
committed
Hold Pod in cache until all other cleanup work is completed
- Move "ForgetPod" after "RunReservePluginsUnreserve", so that the cache would hold the pod to avoid it's being retried simutaneously until Unreserve is completed. - Move "assume" ahead of "RunReservePluginsReserve". This is based on the fact that "ForgetPod" is the last step of failure path, so "assume" should be reversly treated as the first step. The current failure path is like this: assume -> reserve -> unreserve -> forgetPod -> recordingFailure - Make subtests of TestReservePluginUnreserve stateless
1 parent d3edcb7 commit 0e71fac

File tree

2 files changed

+66
-76
lines changed

2 files changed

+66
-76
lines changed

pkg/scheduler/scheduler.go

Lines changed: 15 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -501,16 +501,6 @@ func (sched *Scheduler) scheduleOne(ctx context.Context) {
501501
// This allows us to keep scheduling without waiting on binding to occur.
502502
assumedPodInfo := podInfo.DeepCopy()
503503
assumedPod := assumedPodInfo.Pod
504-
505-
// Run the Reserve method of reserve plugins.
506-
if sts := prof.RunReservePluginsReserve(schedulingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost); !sts.IsSuccess() {
507-
sched.recordSchedulingFailure(prof, assumedPodInfo, sts.AsError(), SchedulerError, "")
508-
metrics.PodScheduleError(prof.Name, metrics.SinceInSeconds(start))
509-
// trigger un-reserve to clean up state associated with the reserved Pod
510-
prof.RunReservePluginsUnreserve(schedulingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
511-
return
512-
}
513-
514504
// assume modifies `assumedPod` by setting NodeName=scheduleResult.SuggestedHost
515505
err = sched.assume(assumedPod, scheduleResult.SuggestedHost)
516506
if err != nil {
@@ -521,7 +511,14 @@ func (sched *Scheduler) scheduleOne(ctx context.Context) {
521511
// (otherwise this would cause an infinite loop).
522512
sched.recordSchedulingFailure(prof, assumedPodInfo, err, SchedulerError, "")
523513
metrics.PodScheduleError(prof.Name, metrics.SinceInSeconds(start))
524-
// trigger un-reserve plugins to clean up state associated with the reserved Pod
514+
return
515+
}
516+
517+
// Run the Reserve method of reserve plugins.
518+
if sts := prof.RunReservePluginsReserve(schedulingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost); !sts.IsSuccess() {
519+
sched.recordSchedulingFailure(prof, assumedPodInfo, sts.AsError(), SchedulerError, "")
520+
metrics.PodScheduleError(prof.Name, metrics.SinceInSeconds(start))
521+
// trigger un-reserve to clean up state associated with the reserved Pod
525522
prof.RunReservePluginsUnreserve(schedulingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
526523
return
527524
}
@@ -537,11 +534,11 @@ func (sched *Scheduler) scheduleOne(ctx context.Context) {
537534
metrics.PodScheduleError(prof.Name, metrics.SinceInSeconds(start))
538535
reason = SchedulerError
539536
}
537+
// One of the plugins returned status different than success or wait.
538+
prof.RunReservePluginsUnreserve(schedulingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
540539
if forgetErr := sched.Cache().ForgetPod(assumedPod); forgetErr != nil {
541540
klog.Errorf("scheduler cache ForgetPod failed: %v", forgetErr)
542541
}
543-
// One of the plugins returned status different than success or wait.
544-
prof.RunReservePluginsUnreserve(schedulingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
545542
sched.recordSchedulingFailure(prof, assumedPodInfo, runPermitStatus.AsError(), reason, "")
546543
return
547544
}
@@ -563,27 +560,25 @@ func (sched *Scheduler) scheduleOne(ctx context.Context) {
563560
metrics.PodScheduleError(prof.Name, metrics.SinceInSeconds(start))
564561
reason = SchedulerError
565562
}
563+
// trigger un-reserve plugins to clean up state associated with the reserved Pod
564+
prof.RunReservePluginsUnreserve(bindingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
566565
if forgetErr := sched.Cache().ForgetPod(assumedPod); forgetErr != nil {
567566
klog.Errorf("scheduler cache ForgetPod failed: %v", forgetErr)
568567
}
569-
// trigger un-reserve plugins to clean up state associated with the reserved Pod
570-
prof.RunReservePluginsUnreserve(bindingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
571568
sched.recordSchedulingFailure(prof, assumedPodInfo, waitOnPermitStatus.AsError(), reason, "")
572569
return
573570
}
574571

575572
// Run "prebind" plugins.
576573
preBindStatus := prof.RunPreBindPlugins(bindingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
577574
if !preBindStatus.IsSuccess() {
578-
var reason string
579575
metrics.PodScheduleError(prof.Name, metrics.SinceInSeconds(start))
580-
reason = SchedulerError
576+
// trigger un-reserve plugins to clean up state associated with the reserved Pod
577+
prof.RunReservePluginsUnreserve(bindingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
581578
if forgetErr := sched.Cache().ForgetPod(assumedPod); forgetErr != nil {
582579
klog.Errorf("scheduler cache ForgetPod failed: %v", forgetErr)
583580
}
584-
// trigger un-reserve plugins to clean up state associated with the reserved Pod
585-
prof.RunReservePluginsUnreserve(bindingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
586-
sched.recordSchedulingFailure(prof, assumedPodInfo, preBindStatus.AsError(), reason, "")
581+
sched.recordSchedulingFailure(prof, assumedPodInfo, preBindStatus.AsError(), SchedulerError, "")
587582
return
588583
}
589584

test/integration/scheduler/framework_test.go

Lines changed: 51 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -971,57 +971,6 @@ func TestPrebindPlugin(t *testing.T) {
971971
// tests that the order of invocation of Unreserve operation is executed in the
972972
// reverse order of invocation of the Reserve operation.
973973
func TestReservePluginUnreserve(t *testing.T) {
974-
numReservePlugins := 3
975-
pluginInvokeEventChan := make(chan pluginInvokeEvent, numReservePlugins)
976-
977-
preBindPlugin := &PreBindPlugin{
978-
failPreBind: true,
979-
}
980-
var reservePlugins []*ReservePlugin
981-
for i := 0; i < numReservePlugins; i++ {
982-
reservePlugins = append(reservePlugins, &ReservePlugin{
983-
name: fmt.Sprintf("%s-%d", reservePluginName, i),
984-
pluginInvokeEventChan: pluginInvokeEventChan,
985-
})
986-
}
987-
988-
registry := frameworkruntime.Registry{
989-
// TODO(#92229): test more failure points that would trigger Unreserve in
990-
// reserve plugins than just one pre-bind plugin.
991-
preBindPluginName: newPlugin(preBindPlugin),
992-
}
993-
for _, pl := range reservePlugins {
994-
registry[pl.Name()] = newPlugin(pl)
995-
}
996-
997-
// Setup initial reserve and prebind plugin for testing.
998-
prof := schedulerconfig.KubeSchedulerProfile{
999-
SchedulerName: v1.DefaultSchedulerName,
1000-
Plugins: &schedulerconfig.Plugins{
1001-
Reserve: &schedulerconfig.PluginSet{
1002-
// filled by looping over reservePlugins
1003-
},
1004-
PreBind: &schedulerconfig.PluginSet{
1005-
Enabled: []schedulerconfig.Plugin{
1006-
{
1007-
Name: preBindPluginName,
1008-
},
1009-
},
1010-
},
1011-
},
1012-
}
1013-
for _, pl := range reservePlugins {
1014-
prof.Plugins.Reserve.Enabled = append(prof.Plugins.Reserve.Enabled, schedulerconfig.Plugin{
1015-
Name: pl.Name(),
1016-
})
1017-
}
1018-
1019-
// Create the master and the scheduler with the test plugin set.
1020-
testCtx := initTestSchedulerForFrameworkTest(t, testutils.InitTestMaster(t, "reserve-plugin-unreserve", nil), 2,
1021-
scheduler.WithProfiles(prof),
1022-
scheduler.WithFrameworkOutOfTreeRegistry(registry))
1023-
defer testutils.CleanupTest(t, testCtx)
1024-
1025974
tests := []struct {
1026975
name string
1027976
failReserve bool
@@ -1044,6 +993,57 @@ func TestReservePluginUnreserve(t *testing.T) {
1044993

1045994
for _, test := range tests {
1046995
t.Run(test.name, func(t *testing.T) {
996+
numReservePlugins := 3
997+
pluginInvokeEventChan := make(chan pluginInvokeEvent, numReservePlugins)
998+
999+
preBindPlugin := &PreBindPlugin{
1000+
failPreBind: true,
1001+
}
1002+
var reservePlugins []*ReservePlugin
1003+
for i := 0; i < numReservePlugins; i++ {
1004+
reservePlugins = append(reservePlugins, &ReservePlugin{
1005+
name: fmt.Sprintf("%s-%d", reservePluginName, i),
1006+
pluginInvokeEventChan: pluginInvokeEventChan,
1007+
})
1008+
}
1009+
1010+
registry := frameworkruntime.Registry{
1011+
// TODO(#92229): test more failure points that would trigger Unreserve in
1012+
// reserve plugins than just one pre-bind plugin.
1013+
preBindPluginName: newPlugin(preBindPlugin),
1014+
}
1015+
for _, pl := range reservePlugins {
1016+
registry[pl.Name()] = newPlugin(pl)
1017+
}
1018+
1019+
// Setup initial reserve and prebind plugin for testing.
1020+
prof := schedulerconfig.KubeSchedulerProfile{
1021+
SchedulerName: v1.DefaultSchedulerName,
1022+
Plugins: &schedulerconfig.Plugins{
1023+
Reserve: &schedulerconfig.PluginSet{
1024+
// filled by looping over reservePlugins
1025+
},
1026+
PreBind: &schedulerconfig.PluginSet{
1027+
Enabled: []schedulerconfig.Plugin{
1028+
{
1029+
Name: preBindPluginName,
1030+
},
1031+
},
1032+
},
1033+
},
1034+
}
1035+
for _, pl := range reservePlugins {
1036+
prof.Plugins.Reserve.Enabled = append(prof.Plugins.Reserve.Enabled, schedulerconfig.Plugin{
1037+
Name: pl.Name(),
1038+
})
1039+
}
1040+
1041+
// Create the master and the scheduler with the test plugin set.
1042+
testCtx := initTestSchedulerForFrameworkTest(t, testutils.InitTestMaster(t, "reserve-plugin-unreserve", nil), 2,
1043+
scheduler.WithProfiles(prof),
1044+
scheduler.WithFrameworkOutOfTreeRegistry(registry))
1045+
defer testutils.CleanupTest(t, testCtx)
1046+
10471047
preBindPlugin.failPreBind = test.failPreBind
10481048
if test.failReserve {
10491049
reservePlugins[test.failReserveIndex].failReserve = true
@@ -1080,11 +1080,6 @@ func TestReservePluginUnreserve(t *testing.T) {
10801080
}
10811081
}
10821082
}
1083-
1084-
preBindPlugin.reset()
1085-
for _, pl := range reservePlugins {
1086-
pl.reset()
1087-
}
10881083
testutils.CleanupPods(testCtx.ClientSet, t, []*v1.Pod{pod})
10891084
})
10901085
}

0 commit comments

Comments
 (0)