@@ -116,6 +116,10 @@ type Interface interface {
116
116
// Note that both `state` and `nodeInfo` are deep copied.
117
117
SelectVictimsOnNode (ctx context.Context , state * framework.CycleState ,
118
118
pod * v1.Pod , nodeInfo * framework.NodeInfo , pdbs []* policy.PodDisruptionBudget ) ([]* v1.Pod , int , * framework.Status )
119
+ // OrderedScoreFuncs returns a list of ordered score functions to select preferable node where victims will be preempted.
120
+ // The ordered score functions will be processed one by one iff we find more than one node with the highest score.
121
+ // Default score functions will be processed if nil returned here for backwards-compatibility.
122
+ OrderedScoreFuncs (ctx context.Context , nodesToVictims map [string ]* extenderv1.Victims ) []func (node string ) int64
119
123
}
120
124
121
125
type Evaluator struct {
@@ -190,7 +194,7 @@ func (ev *Evaluator) Preempt(ctx context.Context, pod *v1.Pod, m framework.NodeT
190
194
}
191
195
192
196
// 4) Find the best candidate.
193
- bestCandidate := ev .SelectCandidate (logger , candidates )
197
+ bestCandidate := ev .SelectCandidate (ctx , candidates )
194
198
if bestCandidate == nil || len (bestCandidate .Name ()) == 0 {
195
199
return nil , framework .NewStatus (framework .Unschedulable , "no candidate node for preemption" )
196
200
}
@@ -309,7 +313,9 @@ func (ev *Evaluator) callExtenders(logger klog.Logger, pod *v1.Pod, candidates [
309
313
310
314
// SelectCandidate chooses the best-fit candidate from given <candidates> and return it.
311
315
// NOTE: This method is exported for easier testing in default preemption.
312
- func (ev * Evaluator ) SelectCandidate (logger klog.Logger , candidates []Candidate ) Candidate {
316
+ func (ev * Evaluator ) SelectCandidate (ctx context.Context , candidates []Candidate ) Candidate {
317
+ logger := klog .FromContext (ctx )
318
+
313
319
if len (candidates ) == 0 {
314
320
return nil
315
321
}
@@ -318,7 +324,8 @@ func (ev *Evaluator) SelectCandidate(logger klog.Logger, candidates []Candidate)
318
324
}
319
325
320
326
victimsMap := ev .CandidatesToVictimsMap (candidates )
321
- candidateNode := pickOneNodeForPreemption (logger , victimsMap )
327
+ scoreFuncs := ev .OrderedScoreFuncs (ctx , victimsMap )
328
+ candidateNode := pickOneNodeForPreemption (logger , victimsMap , scoreFuncs )
322
329
323
330
// Same as candidatesToVictimsMap, this logic is not applicable for out-of-tree
324
331
// preemption plugins that exercise different candidates on the same nominated node.
@@ -428,8 +435,10 @@ func getPodDisruptionBudgets(pdbLister policylisters.PodDisruptionBudgetLister)
428
435
return nil , nil
429
436
}
430
437
431
- // pickOneNodeForPreemption chooses one node among the given nodes. It assumes
432
- // pods in each map entry are ordered by decreasing priority.
438
+ // pickOneNodeForPreemption chooses one node among the given nodes.
439
+ // It assumes pods in each map entry are ordered by decreasing priority.
440
+ // If the scoreFuns is not empty, It picks a node based on score scoreFuns returns.
441
+ // If the scoreFuns is empty,
433
442
// It picks a node based on the following criteria:
434
443
// 1. A node with minimum number of PDB violations.
435
444
// 2. A node with minimum highest priority victim is picked.
@@ -439,7 +448,7 @@ func getPodDisruptionBudgets(pdbLister policylisters.PodDisruptionBudgetLister)
439
448
// 6. If there are still ties, the first such node is picked (sort of randomly).
440
449
// The 'minNodes1' and 'minNodes2' are being reused here to save the memory
441
450
// allocation and garbage collection time.
442
- func pickOneNodeForPreemption (logger klog.Logger , nodesToVictims map [string ]* extenderv1.Victims ) string {
451
+ func pickOneNodeForPreemption (logger klog.Logger , nodesToVictims map [string ]* extenderv1.Victims , scoreFuncs [] func ( node string ) int64 ) string {
443
452
if len (nodesToVictims ) == 0 {
444
453
return ""
445
454
}
@@ -449,58 +458,60 @@ func pickOneNodeForPreemption(logger klog.Logger, nodesToVictims map[string]*ext
449
458
allCandidates = append (allCandidates , node )
450
459
}
451
460
452
- minNumPDBViolatingScoreFunc := func (node string ) int64 {
453
- // The smaller the NumPDBViolations, the higher the score.
454
- return - nodesToVictims [node ].NumPDBViolations
455
- }
456
- minHighestPriorityScoreFunc := func (node string ) int64 {
457
- // highestPodPriority is the highest priority among the victims on this node.
458
- highestPodPriority := corev1helpers .PodPriority (nodesToVictims [node ].Pods [0 ])
459
- // The smaller the highestPodPriority, the higher the score.
460
- return - int64 (highestPodPriority )
461
- }
462
- minSumPrioritiesScoreFunc := func (node string ) int64 {
463
- var sumPriorities int64
464
- for _ , pod := range nodesToVictims [node ].Pods {
465
- // We add MaxInt32+1 to all priorities to make all of them >= 0. This is
466
- // needed so that a node with a few pods with negative priority is not
467
- // picked over a node with a smaller number of pods with the same negative
468
- // priority (and similar scenarios).
469
- sumPriorities += int64 (corev1helpers .PodPriority (pod )) + int64 (math .MaxInt32 + 1 )
461
+ if len (scoreFuncs ) == 0 {
462
+ minNumPDBViolatingScoreFunc := func (node string ) int64 {
463
+ // The smaller the NumPDBViolations, the higher the score.
464
+ return - nodesToVictims [node ].NumPDBViolations
465
+ }
466
+ minHighestPriorityScoreFunc := func (node string ) int64 {
467
+ // highestPodPriority is the highest priority among the victims on this node.
468
+ highestPodPriority := corev1helpers .PodPriority (nodesToVictims [node ].Pods [0 ])
469
+ // The smaller the highestPodPriority, the higher the score.
470
+ return - int64 (highestPodPriority )
471
+ }
472
+ minSumPrioritiesScoreFunc := func (node string ) int64 {
473
+ var sumPriorities int64
474
+ for _ , pod := range nodesToVictims [node ].Pods {
475
+ // We add MaxInt32+1 to all priorities to make all of them >= 0. This is
476
+ // needed so that a node with a few pods with negative priority is not
477
+ // picked over a node with a smaller number of pods with the same negative
478
+ // priority (and similar scenarios).
479
+ sumPriorities += int64 (corev1helpers .PodPriority (pod )) + int64 (math .MaxInt32 + 1 )
480
+ }
481
+ // The smaller the sumPriorities, the higher the score.
482
+ return - sumPriorities
470
483
}
471
- // The smaller the sumPriorities, the higher the score.
472
- return - sumPriorities
473
- }
474
- minNumPodsScoreFunc := func (node string ) int64 {
475
- // The smaller the length of pods, the higher the score.
476
- return - int64 (len (nodesToVictims [node ].Pods ))
477
- }
478
- latestStartTimeScoreFunc := func (node string ) int64 {
479
- // Get earliest start time of all pods on the current node.
480
- earliestStartTimeOnNode := util .GetEarliestPodStartTime (nodesToVictims [node ])
481
- if earliestStartTimeOnNode == nil {
482
- logger .Error (errors .New ("earliestStartTime is nil for node" ), "Should not reach here" , "node" , node )
483
- return int64 (math .MinInt64 )
484
+ minNumPodsScoreFunc := func (node string ) int64 {
485
+ // The smaller the length of pods, the higher the score.
486
+ return - int64 (len (nodesToVictims [node ].Pods ))
487
+ }
488
+ latestStartTimeScoreFunc := func (node string ) int64 {
489
+ // Get the earliest start time of all pods on the current node.
490
+ earliestStartTimeOnNode := util .GetEarliestPodStartTime (nodesToVictims [node ])
491
+ if earliestStartTimeOnNode == nil {
492
+ logger .Error (errors .New ("earliestStartTime is nil for node" ), "Should not reach here" , "node" , node )
493
+ return int64 (math .MinInt64 )
494
+ }
495
+ // The bigger the earliestStartTimeOnNode, the higher the score.
496
+ return earliestStartTimeOnNode .UnixNano ()
497
+ }
498
+
499
+ // Each scoreFunc scores the nodes according to specific rules and keeps the name of the node
500
+ // with the highest score. If and only if the scoreFunc has more than one node with the highest
501
+ // score, we will execute the other scoreFunc in order of precedence.
502
+ scoreFuncs = []func (string ) int64 {
503
+ // A node with a minimum number of PDB is preferable.
504
+ minNumPDBViolatingScoreFunc ,
505
+ // A node with a minimum highest priority victim is preferable.
506
+ minHighestPriorityScoreFunc ,
507
+ // A node with the smallest sum of priorities is preferable.
508
+ minSumPrioritiesScoreFunc ,
509
+ // A node with the minimum number of pods is preferable.
510
+ minNumPodsScoreFunc ,
511
+ // A node with the latest start time of all highest priority victims is preferable.
512
+ latestStartTimeScoreFunc ,
513
+ // If there are still ties, then the first Node in the list is selected.
484
514
}
485
- // The bigger the earliestStartTimeOnNode, the higher the score.
486
- return earliestStartTimeOnNode .UnixNano ()
487
- }
488
-
489
- // Each scoreFunc scores the nodes according to specific rules and keeps the name of the node
490
- // with the highest score. If and only if the scoreFunc has more than one node with the highest
491
- // score, we will execute the other scoreFunc in order of precedence.
492
- scoreFuncs := []func (string ) int64 {
493
- // A node with a minimum number of PDB is preferable.
494
- minNumPDBViolatingScoreFunc ,
495
- // A node with a minimum highest priority victim is preferable.
496
- minHighestPriorityScoreFunc ,
497
- // A node with the smallest sum of priorities is preferable.
498
- minSumPrioritiesScoreFunc ,
499
- // A node with the minimum number of pods is preferable.
500
- minNumPodsScoreFunc ,
501
- // A node with the latest start time of all highest priority victims is preferable.
502
- latestStartTimeScoreFunc ,
503
- // If there are still ties, then the first Node in the list is selected.
504
515
}
505
516
506
517
for _ , f := range scoreFuncs {
0 commit comments