Skip to content

Commit c58767b

Browse files
authored
Merge pull request kubernetes#81876 from wgliang/bugfix/filter-plugins-are-not-been-called-during-preemption
Fix filter plugins are not been called during preemption
2 parents 57df10a + d84a75c commit c58767b

File tree

4 files changed

+145
-57
lines changed

4 files changed

+145
-57
lines changed

pkg/scheduler/core/generic_scheduler.go

Lines changed: 31 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ type ScheduleAlgorithm interface {
137137
// the pod by preempting lower priority pods if possible.
138138
// It returns the node where preemption happened, a list of preempted pods, a
139139
// list of pods whose nominated node name should be removed, and error if any.
140-
Preempt(*v1.Pod, error) (selectedNode *v1.Node, preemptedPods []*v1.Pod, cleanupNominatedPods []*v1.Pod, err error)
140+
Preempt(*framework.PluginContext, *v1.Pod, error) (selectedNode *v1.Node, preemptedPods []*v1.Pod, cleanupNominatedPods []*v1.Pod, err error)
141141
// Predicates() returns a pointer to a map of predicate functions. This is
142142
// exposed for testing.
143143
Predicates() map[string]predicates.FitPredicate
@@ -317,7 +317,7 @@ func (g *genericScheduler) selectHost(priorityList schedulerapi.HostPriorityList
317317
// other pods with the same priority. The nominated pod prevents other pods from
318318
// using the nominated resources and the nominated pod could take a long time
319319
// before it is retried after many other pending pods.
320-
func (g *genericScheduler) Preempt(pod *v1.Pod, scheduleErr error) (*v1.Node, []*v1.Pod, []*v1.Pod, error) {
320+
func (g *genericScheduler) Preempt(pluginContext *framework.PluginContext, pod *v1.Pod, scheduleErr error) (*v1.Node, []*v1.Pod, []*v1.Pod, error) {
321321
// Scheduler may return various types of errors. Consider preemption only if
322322
// the error is of type FitError.
323323
fitError, ok := scheduleErr.(*FitError)
@@ -342,7 +342,7 @@ func (g *genericScheduler) Preempt(pod *v1.Pod, scheduleErr error) (*v1.Node, []
342342
if err != nil {
343343
return nil, nil, nil, err
344344
}
345-
nodeToVictims, err := selectNodesForPreemption(pod, g.nodeInfoSnapshot.NodeInfoMap, potentialNodes, g.predicates,
345+
nodeToVictims, err := g.selectNodesForPreemption(pluginContext, pod, g.nodeInfoSnapshot.NodeInfoMap, potentialNodes, g.predicates,
346346
g.predicateMetaProducer, g.schedulingQueue, pdbs)
347347
if err != nil {
348348
return nil, nil, nil, err
@@ -489,7 +489,8 @@ func (g *genericScheduler) findNodesThatFit(pluginContext *framework.PluginConte
489489
checkNode := func(i int) {
490490
nodeName := g.cache.NodeTree().Next()
491491

492-
fits, failedPredicates, err := podFitsOnNode(
492+
fits, failedPredicates, status, err := g.podFitsOnNode(
493+
pluginContext,
493494
pod,
494495
meta,
495496
g.nodeInfoSnapshot.NodeInfoMap[nodeName],
@@ -504,18 +505,6 @@ func (g *genericScheduler) findNodesThatFit(pluginContext *framework.PluginConte
504505
return
505506
}
506507
if fits {
507-
// Iterate each plugin to verify current node
508-
status := g.framework.RunFilterPlugins(pluginContext, pod, nodeName)
509-
if !status.IsSuccess() {
510-
predicateResultLock.Lock()
511-
filteredNodesStatuses[nodeName] = status
512-
if !status.IsUnschedulable() {
513-
errs[status.Message()]++
514-
}
515-
predicateResultLock.Unlock()
516-
return
517-
}
518-
519508
length := atomic.AddInt32(&filteredLen, 1)
520509
if length > numNodesToFind {
521510
cancel()
@@ -525,7 +514,12 @@ func (g *genericScheduler) findNodesThatFit(pluginContext *framework.PluginConte
525514
}
526515
} else {
527516
predicateResultLock.Lock()
528-
failedPredicateMap[nodeName] = failedPredicates
517+
if !status.IsSuccess() {
518+
filteredNodesStatuses[nodeName] = status
519+
}
520+
if len(failedPredicates) != 0 {
521+
failedPredicateMap[nodeName] = failedPredicates
522+
}
529523
predicateResultLock.Unlock()
530524
}
531525
}
@@ -613,15 +607,17 @@ func addNominatedPods(pod *v1.Pod, meta predicates.PredicateMetadata,
613607
// When it is called from Preempt, we should remove the victims of preemption and
614608
// add the nominated pods. Removal of the victims is done by SelectVictimsOnNode().
615609
// It removes victims from meta and NodeInfo before calling this function.
616-
func podFitsOnNode(
610+
func (g *genericScheduler) podFitsOnNode(
611+
pluginContext *framework.PluginContext,
617612
pod *v1.Pod,
618613
meta predicates.PredicateMetadata,
619614
info *schedulernodeinfo.NodeInfo,
620615
predicateFuncs map[string]predicates.FitPredicate,
621616
queue internalqueue.SchedulingQueue,
622617
alwaysCheckAllPredicates bool,
623-
) (bool, []predicates.PredicateFailureReason, error) {
618+
) (bool, []predicates.PredicateFailureReason, *framework.Status, error) {
624619
var failedPredicates []predicates.PredicateFailureReason
620+
var status *framework.Status
625621

626622
podsAdded := false
627623
// We run predicates twice in some cases. If the node has greater or equal priority
@@ -660,7 +656,7 @@ func podFitsOnNode(
660656
if predicate, exist := predicateFuncs[predicateKey]; exist {
661657
fit, reasons, err = predicate(pod, metaToUse, nodeInfoToUse)
662658
if err != nil {
663-
return false, []predicates.PredicateFailureReason{}, err
659+
return false, []predicates.PredicateFailureReason{}, nil, err
664660
}
665661

666662
if !fit {
@@ -676,9 +672,14 @@ func podFitsOnNode(
676672
}
677673
}
678674
}
675+
676+
status = g.framework.RunFilterPlugins(pluginContext, pod, info.Node().Name)
677+
if !status.IsSuccess() && !status.IsUnschedulable() {
678+
return false, failedPredicates, status, status.AsError()
679+
}
679680
}
680681

681-
return len(failedPredicates) == 0, failedPredicates, nil
682+
return len(failedPredicates) == 0 && status.IsSuccess(), failedPredicates, status, nil
682683
}
683684

684685
// PrioritizeNodes prioritizes the nodes by running the individual priority functions in parallel.
@@ -992,7 +993,9 @@ func pickOneNodeForPreemption(nodesToVictims map[*v1.Node]*schedulerapi.Victims)
992993

993994
// selectNodesForPreemption finds all the nodes with possible victims for
994995
// preemption in parallel.
995-
func selectNodesForPreemption(pod *v1.Pod,
996+
func (g *genericScheduler) selectNodesForPreemption(
997+
pluginContext *framework.PluginContext,
998+
pod *v1.Pod,
996999
nodeNameToInfo map[string]*schedulernodeinfo.NodeInfo,
9971000
potentialNodes []*v1.Node,
9981001
fitPredicates map[string]predicates.FitPredicate,
@@ -1011,7 +1014,7 @@ func selectNodesForPreemption(pod *v1.Pod,
10111014
if meta != nil {
10121015
metaCopy = meta.ShallowCopy()
10131016
}
1014-
pods, numPDBViolations, fits := selectVictimsOnNode(pod, metaCopy, nodeNameToInfo[nodeName], fitPredicates, queue, pdbs)
1017+
pods, numPDBViolations, fits := g.selectVictimsOnNode(pluginContext, pod, metaCopy, nodeNameToInfo[nodeName], fitPredicates, queue, pdbs)
10151018
if fits {
10161019
resultLock.Lock()
10171020
victims := schedulerapi.Victims{
@@ -1080,7 +1083,8 @@ func filterPodsWithPDBViolation(pods []interface{}, pdbs []*policy.PodDisruption
10801083
// NOTE: This function assumes that it is never called if "pod" cannot be scheduled
10811084
// due to pod affinity, node affinity, or node anti-affinity reasons. None of
10821085
// these predicates can be satisfied by removing more pods from the node.
1083-
func selectVictimsOnNode(
1086+
func (g *genericScheduler) selectVictimsOnNode(
1087+
pluginContext *framework.PluginContext,
10841088
pod *v1.Pod,
10851089
meta predicates.PredicateMetadata,
10861090
nodeInfo *schedulernodeinfo.NodeInfo,
@@ -1121,10 +1125,11 @@ func selectVictimsOnNode(
11211125
// inter-pod affinity to one or more victims, but we have decided not to
11221126
// support this case for performance reasons. Having affinity to lower
11231127
// priority pods is not a recommended configuration anyway.
1124-
if fits, _, err := podFitsOnNode(pod, meta, nodeInfoCopy, fitPredicates, queue, false); !fits {
1128+
if fits, _, _, err := g.podFitsOnNode(pluginContext, pod, meta, nodeInfoCopy, fitPredicates, queue, false); !fits {
11251129
if err != nil {
11261130
klog.Warningf("Encountered error while selecting victims on node %v: %v", nodeInfo.Node().Name, err)
11271131
}
1132+
11281133
return nil, 0, false
11291134
}
11301135
var victims []*v1.Pod
@@ -1136,7 +1141,7 @@ func selectVictimsOnNode(
11361141
violatingVictims, nonViolatingVictims := filterPodsWithPDBViolation(potentialVictims.Items, pdbs)
11371142
reprievePod := func(p *v1.Pod) bool {
11381143
addPod(p)
1139-
fits, _, _ := podFitsOnNode(pod, meta, nodeInfoCopy, fitPredicates, queue, false)
1144+
fits, _, _, _ := g.podFitsOnNode(pluginContext, pod, meta, nodeInfoCopy, fitPredicates, queue, false)
11401145
if !fits {
11411146
removePod(p)
11421147
victims = append(victims, p)

0 commit comments

Comments
 (0)