Skip to content

Commit 4fc5c1e

Browse files
authored
Merge pull request kubernetes#92391 from adtac/adtac/reserve-failure
scheduler: run Unreserve if Reserve fails
2 parents ad29e16 + 1b223b8 commit 4fc5c1e

File tree

4 files changed

+26
-10
lines changed

4 files changed

+26
-10
lines changed

pkg/scheduler/framework/runtime/framework.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -788,7 +788,8 @@ func (f *frameworkImpl) runPostBindPlugin(ctx context.Context, pl framework.Post
788788
// RunReservePluginsReserve runs the Reserve method in the set of configured
789789
// reserve plugins. If any of these plugins returns an error, it does not
790790
// continue running the remaining ones and returns the error. In such a case,
791-
// the pod will not be scheduled.
791+
// the pod will not be scheduled and the caller will be expected to call
792+
// RunReservePluginsUnreserve.
792793
func (f *frameworkImpl) RunReservePluginsReserve(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) (status *framework.Status) {
793794
startTime := time.Now()
794795
defer func() {

pkg/scheduler/framework/v1alpha1/interface.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -333,11 +333,14 @@ type ScorePlugin interface {
333333
type ReservePlugin interface {
334334
Plugin
335335
// Reserve is called by the scheduling framework when the scheduler cache is
336-
// updated.
336+
// updated. If this method returns a failed Status, the scheduler will call
337+
// the Unreserve method for all enabled ReservePlugins.
337338
Reserve(ctx context.Context, state *CycleState, p *v1.Pod, nodeName string) *Status
338339
// Unreserve is called by the scheduling framework when a reserved pod was
339340
// rejected, an error occurred during reservation of subsequent plugins, or
340-
// in a later phase. The Unreserve method implementation must be idempotent.
341+
// in a later phase. The Unreserve method implementation must be idempotent
342+
// and may be called by the scheduler even if the corresponding Reserve
343+
// method for the same plugin was not called.
341344
Unreserve(ctx context.Context, state *CycleState, p *v1.Pod, nodeName string)
342345
}
343346

pkg/scheduler/scheduler.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -520,6 +520,8 @@ func (sched *Scheduler) scheduleOne(ctx context.Context) {
520520
if sts := prof.RunReservePluginsReserve(schedulingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost); !sts.IsSuccess() {
521521
sched.recordSchedulingFailure(prof, assumedPodInfo, sts.AsError(), SchedulerError, "")
522522
metrics.PodScheduleError(prof.Name, metrics.SinceInSeconds(start))
523+
// trigger un-reserve to clean up state associated with the reserved Pod
524+
prof.RunReservePluginsUnreserve(schedulingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
523525
return
524526
}
525527

test/integration/scheduler/framework_test.go

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,7 @@ func (rp *ReservePlugin) Unreserve(ctx context.Context, state *framework.CycleSt
269269
func (rp *ReservePlugin) reset() {
270270
rp.numReserveCalled = 0
271271
rp.numUnreserveCalled = 0
272+
rp.failReserve = false
272273
}
273274

274275
// Name returns name of the plugin.
@@ -1006,30 +1007,39 @@ func TestReservePluginUnreserve(t *testing.T) {
10061007
defer testutils.CleanupTest(t, testCtx)
10071008

10081009
tests := []struct {
1009-
name string
1010-
preBindFail bool
1010+
name string
1011+
failReserve bool
1012+
failReserveIndex int
1013+
failPreBind bool
10111014
}{
1015+
{
1016+
name: "fail reserve",
1017+
failReserve: true,
1018+
failReserveIndex: 1,
1019+
},
10121020
{
10131021
name: "fail preBind",
1014-
preBindFail: true,
1022+
failPreBind: true,
10151023
},
10161024
{
1017-
name: "pass preBind",
1018-
preBindFail: false,
1025+
name: "pass everything",
10191026
},
10201027
}
10211028

10221029
for _, test := range tests {
10231030
t.Run(test.name, func(t *testing.T) {
1024-
preBindPlugin.failPreBind = test.preBindFail
1031+
preBindPlugin.failPreBind = test.failPreBind
1032+
if test.failReserve {
1033+
reservePlugins[test.failReserveIndex].failReserve = true
1034+
}
10251035
// Create a best effort pod.
10261036
pod, err := createPausePod(testCtx.ClientSet,
10271037
initPausePod(&pausePodConfig{Name: "test-pod", Namespace: testCtx.NS.Name}))
10281038
if err != nil {
10291039
t.Errorf("Error while creating a test pod: %v", err)
10301040
}
10311041

1032-
if test.preBindFail {
1042+
if test.failPreBind || test.failReserve {
10331043
if err = wait.Poll(10*time.Millisecond, 30*time.Second, podSchedulingError(testCtx.ClientSet, pod.Namespace, pod.Name)); err != nil {
10341044
t.Errorf("Expected a scheduling error, but didn't get it: %v", err)
10351045
}

0 commit comments

Comments
 (0)