Skip to content

Commit db28b02

Browse files
authored
Merge pull request kubernetes#93511 from Huang-Wei/flake-reserve-plugin
Hold Pod in cache until all other cleanup work is completed
2 parents 0c642b6 + 0e71fac commit db28b02

File tree

2 files changed

+66
-76
lines changed

2 files changed

+66
-76
lines changed

pkg/scheduler/scheduler.go

Lines changed: 15 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -501,16 +501,6 @@ func (sched *Scheduler) scheduleOne(ctx context.Context) {
501501
// This allows us to keep scheduling without waiting on binding to occur.
502502
assumedPodInfo := podInfo.DeepCopy()
503503
assumedPod := assumedPodInfo.Pod
504-
505-
// Run the Reserve method of reserve plugins.
506-
if sts := prof.RunReservePluginsReserve(schedulingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost); !sts.IsSuccess() {
507-
sched.recordSchedulingFailure(prof, assumedPodInfo, sts.AsError(), SchedulerError, "")
508-
metrics.PodScheduleError(prof.Name, metrics.SinceInSeconds(start))
509-
// trigger un-reserve to clean up state associated with the reserved Pod
510-
prof.RunReservePluginsUnreserve(schedulingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
511-
return
512-
}
513-
514504
// assume modifies `assumedPod` by setting NodeName=scheduleResult.SuggestedHost
515505
err = sched.assume(assumedPod, scheduleResult.SuggestedHost)
516506
if err != nil {
@@ -521,7 +511,14 @@ func (sched *Scheduler) scheduleOne(ctx context.Context) {
521511
// (otherwise this would cause an infinite loop).
522512
sched.recordSchedulingFailure(prof, assumedPodInfo, err, SchedulerError, "")
523513
metrics.PodScheduleError(prof.Name, metrics.SinceInSeconds(start))
524-
// trigger un-reserve plugins to clean up state associated with the reserved Pod
514+
return
515+
}
516+
517+
// Run the Reserve method of reserve plugins.
518+
if sts := prof.RunReservePluginsReserve(schedulingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost); !sts.IsSuccess() {
519+
sched.recordSchedulingFailure(prof, assumedPodInfo, sts.AsError(), SchedulerError, "")
520+
metrics.PodScheduleError(prof.Name, metrics.SinceInSeconds(start))
521+
// trigger un-reserve to clean up state associated with the reserved Pod
525522
prof.RunReservePluginsUnreserve(schedulingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
526523
return
527524
}
@@ -537,11 +534,11 @@ func (sched *Scheduler) scheduleOne(ctx context.Context) {
537534
metrics.PodScheduleError(prof.Name, metrics.SinceInSeconds(start))
538535
reason = SchedulerError
539536
}
537+
// One of the plugins returned status different than success or wait.
538+
prof.RunReservePluginsUnreserve(schedulingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
540539
if forgetErr := sched.Cache().ForgetPod(assumedPod); forgetErr != nil {
541540
klog.Errorf("scheduler cache ForgetPod failed: %v", forgetErr)
542541
}
543-
// One of the plugins returned status different than success or wait.
544-
prof.RunReservePluginsUnreserve(schedulingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
545542
sched.recordSchedulingFailure(prof, assumedPodInfo, runPermitStatus.AsError(), reason, "")
546543
return
547544
}
@@ -563,27 +560,25 @@ func (sched *Scheduler) scheduleOne(ctx context.Context) {
563560
metrics.PodScheduleError(prof.Name, metrics.SinceInSeconds(start))
564561
reason = SchedulerError
565562
}
563+
// trigger un-reserve plugins to clean up state associated with the reserved Pod
564+
prof.RunReservePluginsUnreserve(bindingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
566565
if forgetErr := sched.Cache().ForgetPod(assumedPod); forgetErr != nil {
567566
klog.Errorf("scheduler cache ForgetPod failed: %v", forgetErr)
568567
}
569-
// trigger un-reserve plugins to clean up state associated with the reserved Pod
570-
prof.RunReservePluginsUnreserve(bindingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
571568
sched.recordSchedulingFailure(prof, assumedPodInfo, waitOnPermitStatus.AsError(), reason, "")
572569
return
573570
}
574571

575572
// Run "prebind" plugins.
576573
preBindStatus := prof.RunPreBindPlugins(bindingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
577574
if !preBindStatus.IsSuccess() {
578-
var reason string
579575
metrics.PodScheduleError(prof.Name, metrics.SinceInSeconds(start))
580-
reason = SchedulerError
576+
// trigger un-reserve plugins to clean up state associated with the reserved Pod
577+
prof.RunReservePluginsUnreserve(bindingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
581578
if forgetErr := sched.Cache().ForgetPod(assumedPod); forgetErr != nil {
582579
klog.Errorf("scheduler cache ForgetPod failed: %v", forgetErr)
583580
}
584-
// trigger un-reserve plugins to clean up state associated with the reserved Pod
585-
prof.RunReservePluginsUnreserve(bindingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
586-
sched.recordSchedulingFailure(prof, assumedPodInfo, preBindStatus.AsError(), reason, "")
581+
sched.recordSchedulingFailure(prof, assumedPodInfo, preBindStatus.AsError(), SchedulerError, "")
587582
return
588583
}
589584

test/integration/scheduler/framework_test.go

Lines changed: 51 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -971,57 +971,6 @@ func TestPrebindPlugin(t *testing.T) {
971971
// tests that the order of invocation of Unreserve operation is executed in the
972972
// reverse order of invocation of the Reserve operation.
973973
func TestReservePluginUnreserve(t *testing.T) {
974-
numReservePlugins := 3
975-
pluginInvokeEventChan := make(chan pluginInvokeEvent, numReservePlugins)
976-
977-
preBindPlugin := &PreBindPlugin{
978-
failPreBind: true,
979-
}
980-
var reservePlugins []*ReservePlugin
981-
for i := 0; i < numReservePlugins; i++ {
982-
reservePlugins = append(reservePlugins, &ReservePlugin{
983-
name: fmt.Sprintf("%s-%d", reservePluginName, i),
984-
pluginInvokeEventChan: pluginInvokeEventChan,
985-
})
986-
}
987-
988-
registry := frameworkruntime.Registry{
989-
// TODO(#92229): test more failure points that would trigger Unreserve in
990-
// reserve plugins than just one pre-bind plugin.
991-
preBindPluginName: newPlugin(preBindPlugin),
992-
}
993-
for _, pl := range reservePlugins {
994-
registry[pl.Name()] = newPlugin(pl)
995-
}
996-
997-
// Setup initial reserve and prebind plugin for testing.
998-
prof := schedulerconfig.KubeSchedulerProfile{
999-
SchedulerName: v1.DefaultSchedulerName,
1000-
Plugins: &schedulerconfig.Plugins{
1001-
Reserve: &schedulerconfig.PluginSet{
1002-
// filled by looping over reservePlugins
1003-
},
1004-
PreBind: &schedulerconfig.PluginSet{
1005-
Enabled: []schedulerconfig.Plugin{
1006-
{
1007-
Name: preBindPluginName,
1008-
},
1009-
},
1010-
},
1011-
},
1012-
}
1013-
for _, pl := range reservePlugins {
1014-
prof.Plugins.Reserve.Enabled = append(prof.Plugins.Reserve.Enabled, schedulerconfig.Plugin{
1015-
Name: pl.Name(),
1016-
})
1017-
}
1018-
1019-
// Create the master and the scheduler with the test plugin set.
1020-
testCtx := initTestSchedulerForFrameworkTest(t, testutils.InitTestMaster(t, "reserve-plugin-unreserve", nil), 2,
1021-
scheduler.WithProfiles(prof),
1022-
scheduler.WithFrameworkOutOfTreeRegistry(registry))
1023-
defer testutils.CleanupTest(t, testCtx)
1024-
1025974
tests := []struct {
1026975
name string
1027976
failReserve bool
@@ -1044,6 +993,57 @@ func TestReservePluginUnreserve(t *testing.T) {
1044993

1045994
for _, test := range tests {
1046995
t.Run(test.name, func(t *testing.T) {
996+
numReservePlugins := 3
997+
pluginInvokeEventChan := make(chan pluginInvokeEvent, numReservePlugins)
998+
999+
preBindPlugin := &PreBindPlugin{
1000+
failPreBind: true,
1001+
}
1002+
var reservePlugins []*ReservePlugin
1003+
for i := 0; i < numReservePlugins; i++ {
1004+
reservePlugins = append(reservePlugins, &ReservePlugin{
1005+
name: fmt.Sprintf("%s-%d", reservePluginName, i),
1006+
pluginInvokeEventChan: pluginInvokeEventChan,
1007+
})
1008+
}
1009+
1010+
registry := frameworkruntime.Registry{
1011+
// TODO(#92229): test more failure points that would trigger Unreserve in
1012+
// reserve plugins than just one pre-bind plugin.
1013+
preBindPluginName: newPlugin(preBindPlugin),
1014+
}
1015+
for _, pl := range reservePlugins {
1016+
registry[pl.Name()] = newPlugin(pl)
1017+
}
1018+
1019+
// Setup initial reserve and prebind plugin for testing.
1020+
prof := schedulerconfig.KubeSchedulerProfile{
1021+
SchedulerName: v1.DefaultSchedulerName,
1022+
Plugins: &schedulerconfig.Plugins{
1023+
Reserve: &schedulerconfig.PluginSet{
1024+
// filled by looping over reservePlugins
1025+
},
1026+
PreBind: &schedulerconfig.PluginSet{
1027+
Enabled: []schedulerconfig.Plugin{
1028+
{
1029+
Name: preBindPluginName,
1030+
},
1031+
},
1032+
},
1033+
},
1034+
}
1035+
for _, pl := range reservePlugins {
1036+
prof.Plugins.Reserve.Enabled = append(prof.Plugins.Reserve.Enabled, schedulerconfig.Plugin{
1037+
Name: pl.Name(),
1038+
})
1039+
}
1040+
1041+
// Create the master and the scheduler with the test plugin set.
1042+
testCtx := initTestSchedulerForFrameworkTest(t, testutils.InitTestMaster(t, "reserve-plugin-unreserve", nil), 2,
1043+
scheduler.WithProfiles(prof),
1044+
scheduler.WithFrameworkOutOfTreeRegistry(registry))
1045+
defer testutils.CleanupTest(t, testCtx)
1046+
10471047
preBindPlugin.failPreBind = test.failPreBind
10481048
if test.failReserve {
10491049
reservePlugins[test.failReserveIndex].failReserve = true
@@ -1080,11 +1080,6 @@ func TestReservePluginUnreserve(t *testing.T) {
10801080
}
10811081
}
10821082
}
1083-
1084-
preBindPlugin.reset()
1085-
for _, pl := range reservePlugins {
1086-
pl.reset()
1087-
}
10881083
testutils.CleanupPods(testCtx.ClientSet, t, []*v1.Pod{pod})
10891084
})
10901085
}

0 commit comments

Comments
 (0)