@@ -33,10 +33,11 @@ import (
33
33
policy "k8s.io/api/policy/v1beta1"
34
34
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
35
35
"k8s.io/apimachinery/pkg/labels"
36
+ utilfeature "k8s.io/apiserver/pkg/util/feature"
36
37
corelisters "k8s.io/client-go/listers/core/v1"
37
- policylisters "k8s.io/client-go/listers/policy/v1beta1"
38
38
extenderv1 "k8s.io/kube-scheduler/extender/v1"
39
39
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
40
+ kubefeatures "k8s.io/kubernetes/pkg/features"
40
41
framework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1"
41
42
internalcache "k8s.io/kubernetes/pkg/scheduler/internal/cache"
42
43
"k8s.io/kubernetes/pkg/scheduler/internal/parallelize"
@@ -100,10 +101,6 @@ func (f *FitError) Error() string {
100
101
// TODO: Rename this type.
101
102
type ScheduleAlgorithm interface {
102
103
Schedule (context.Context , * profile.Profile , * framework.CycleState , * v1.Pod ) (scheduleResult ScheduleResult , err error )
103
- // Preempt receives scheduling filter result (NodeToStatusMap) for a pod and tries to create room for
104
- // the pod by preempting lower priority pods if possible.
105
- // It returns the node where preemption happened, and error if any.
106
- Preempt (context.Context , * profile.Profile , * framework.CycleState , * v1.Pod , framework.NodeToStatusMap ) (selectedNode string , err error )
107
104
// Extenders returns a slice of extender config. This is exposed for
108
105
// testing.
109
106
Extenders () []framework.Extender
@@ -126,7 +123,6 @@ type genericScheduler struct {
126
123
extenders []framework.Extender
127
124
nodeInfoSnapshot * internalcache.Snapshot
128
125
pvcLister corelisters.PersistentVolumeClaimLister
129
- pdbLister policylisters.PodDisruptionBudgetLister
130
126
disablePreemption bool
131
127
percentageOfNodesToScore int32
132
128
nextStartNodeIndex int
@@ -236,7 +232,7 @@ func (g *genericScheduler) selectHost(nodeScoreList framework.NodeScoreList) (st
236
232
return selected , nil
237
233
}
238
234
239
- // preempt finds nodes with pods that can be preempted to make room for "pod" to
235
+ // Preempt finds nodes with pods that can be preempted to make room for "pod" to
240
236
// schedule. It chooses one of the nodes and preempts the pods on the node and
241
237
// returns 1) the node, 2) the list of preempted pods if such a node is found,
242
238
// 3) A list of pods whose nominated node name should be cleared, and 4) any
@@ -248,20 +244,20 @@ func (g *genericScheduler) selectHost(nodeScoreList framework.NodeScoreList) (st
248
244
// other pods with the same priority. The nominated pod prevents other pods from
249
245
// using the nominated resources and the nominated pod could take a long time
250
246
// before it is retried after many other pending pods.
251
- func ( g * genericScheduler ) Preempt (ctx context.Context , prof * profile. Profile , state * framework.CycleState , pod * v1.Pod , m framework.NodeToStatusMap ) (string , error ) {
252
- cs := prof .ClientSet ()
247
+ func Preempt (ctx context.Context , fh framework. FrameworkHandle , state * framework.CycleState , pod * v1.Pod , m framework.NodeToStatusMap ) (string , error ) {
248
+ cs := fh .ClientSet ()
253
249
// TODO(Huang-Wei): get pod from informer cache instead of API server.
254
250
pod , err := util .GetUpdatedPod (cs , pod )
255
251
if err != nil {
256
252
klog .Errorf ("Error getting the updated preemptor pod object: %v" , err )
257
253
return "" , err
258
254
}
259
255
260
- if ! podEligibleToPreemptOthers (pod , g . nodeInfoSnapshot .NodeInfos ()) {
256
+ if ! podEligibleToPreemptOthers (pod , fh . SnapshotSharedLister () .NodeInfos ()) {
261
257
klog .V (5 ).Infof ("Pod %v/%v is not eligible for more preemption." , pod .Namespace , pod .Name )
262
258
return "" , nil
263
259
}
264
- allNodes , err := g . nodeInfoSnapshot .NodeInfos ().List ()
260
+ allNodes , err := fh . SnapshotSharedLister () .NodeInfos ().List ()
265
261
if err != nil {
266
262
return "" , err
267
263
}
@@ -285,22 +281,19 @@ func (g *genericScheduler) Preempt(ctx context.Context, prof *profile.Profile, s
285
281
}
286
282
klog .Infof ("%v potential nodes for preemption, first %v are: %v" , len (potentialNodes ), len (sample ), sample )
287
283
}
288
- var pdbs []* policy.PodDisruptionBudget
289
- if g .pdbLister != nil {
290
- pdbs , err = g .pdbLister .List (labels .Everything ())
291
- if err != nil {
292
- return "" , err
293
- }
284
+ pdbs , err := getPodDisruptionBudgets (fh )
285
+ if err != nil {
286
+ return "" , err
294
287
}
295
- nodeNameToVictims , err := selectNodesForPreemption (ctx , prof , g . podNominator , state , pod , potentialNodes , pdbs )
288
+ nodeNameToVictims , err := selectNodesForPreemption (ctx , fh . PreemptHandle (), fh . PreemptHandle () , state , pod , potentialNodes , pdbs )
296
289
if err != nil {
297
290
return "" , err
298
291
}
299
292
300
293
// We will only check nodeNameToVictims with extenders that support preemption.
301
294
// Extenders which do not support preemption may later prevent preemptor from being scheduled on the nominated
302
295
// node. In that case, scheduler will find a different host for the preemptor in subsequent scheduling cycles.
303
- nodeNameToVictims , err = g . processPreemptionWithExtenders (pod , nodeNameToVictims )
296
+ nodeNameToVictims , err = processPreemptionWithExtenders (fh , pod , nodeNameToVictims )
304
297
if err != nil {
305
298
return "" , err
306
299
}
@@ -317,18 +310,18 @@ func (g *genericScheduler) Preempt(ctx context.Context, prof *profile.Profile, s
317
310
return "" , err
318
311
}
319
312
// If the victim is a WaitingPod, send a reject message to the PermitPlugin
320
- if waitingPod := prof .GetWaitingPod (victim .UID ); waitingPod != nil {
313
+ if waitingPod := fh .GetWaitingPod (victim .UID ); waitingPod != nil {
321
314
waitingPod .Reject ("preempted" )
322
315
}
323
- prof . Recorder .Eventf (victim , pod , v1 .EventTypeNormal , "Preempted" , "Preempting" , "Preempted by %v/%v on node %v" , pod .Namespace , pod .Name , candidateNode )
316
+ fh . EventRecorder () .Eventf (victim , pod , v1 .EventTypeNormal , "Preempted" , "Preempting" , "Preempted by %v/%v on node %v" , pod .Namespace , pod .Name , candidateNode )
324
317
}
325
318
metrics .PreemptionVictims .Observe (float64 (len (victims )))
326
319
327
320
// Lower priority pods nominated to run on this node, may no longer fit on
328
321
// this node. So, we should remove their nomination. Removing their
329
322
// nomination updates these pods and moves them to the active queue. It
330
323
// lets scheduler find another place for them.
331
- nominatedPods := g . getLowerPriorityNominatedPods (pod , candidateNode )
324
+ nominatedPods := getLowerPriorityNominatedPods (fh . PreemptHandle (), pod , candidateNode )
332
325
if err := util .ClearNominatedNodeName (cs , nominatedPods ... ); err != nil {
333
326
klog .Errorf ("Cannot clear 'NominatedNodeName' field: %v" , err )
334
327
// We do not return as this error is not critical.
@@ -337,18 +330,22 @@ func (g *genericScheduler) Preempt(ctx context.Context, prof *profile.Profile, s
337
330
return candidateNode , nil
338
331
}
339
332
333
+ func getPodDisruptionBudgets (fh framework.FrameworkHandle ) ([]* policy.PodDisruptionBudget , error ) {
334
+ if utilfeature .DefaultFeatureGate .Enabled (kubefeatures .PodDisruptionBudget ) {
335
+ return fh .SharedInformerFactory ().Policy ().V1beta1 ().PodDisruptionBudgets ().Lister ().List (labels .Everything ())
336
+ }
337
+ return nil , nil
338
+ }
339
+
340
340
// processPreemptionWithExtenders processes preemption with extenders
341
- func (g * genericScheduler ) processPreemptionWithExtenders (
342
- pod * v1.Pod ,
343
- nodeNameToVictims map [string ]* extenderv1.Victims ,
344
- ) (map [string ]* extenderv1.Victims , error ) {
341
+ func processPreemptionWithExtenders (fh framework.FrameworkHandle , pod * v1.Pod , nodeNameToVictims map [string ]* extenderv1.Victims ) (map [string ]* extenderv1.Victims , error ) {
345
342
if len (nodeNameToVictims ) > 0 {
346
- for _ , extender := range g . extenders {
343
+ for _ , extender := range fh . PreemptHandle (). Extenders () {
347
344
if extender .SupportsPreemption () && extender .IsInterested (pod ) {
348
345
newNodeNameToVictims , err := extender .ProcessPreemption (
349
346
pod ,
350
347
nodeNameToVictims ,
351
- g . nodeInfoSnapshot .NodeInfos (),
348
+ fh . SnapshotSharedLister () .NodeInfos (),
352
349
)
353
350
if err != nil {
354
351
if extender .IsIgnorable () {
@@ -381,8 +378,8 @@ func (g *genericScheduler) processPreemptionWithExtenders(
381
378
// manipulation of NodeInfo and PreFilter state per nominated pod. It may not be
382
379
// worth the complexity, especially because we generally expect to have a very
383
380
// small number of nominated pods per node.
384
- func ( g * genericScheduler ) getLowerPriorityNominatedPods ( pod * v1.Pod , nodeName string ) []* v1.Pod {
385
- pods := g . podNominator .NominatedPodsForNode (nodeName )
381
+ func getLowerPriorityNominatedPods ( pn framework. PodNominator , pod * v1.Pod , nodeName string ) []* v1.Pod {
382
+ pods := pn .NominatedPodsForNode (nodeName )
386
383
387
384
if len (pods ) == 0 {
388
385
return nil
@@ -1141,7 +1138,6 @@ func NewGenericScheduler(
1141
1138
nodeInfoSnapshot * internalcache.Snapshot ,
1142
1139
extenders []framework.Extender ,
1143
1140
pvcLister corelisters.PersistentVolumeClaimLister ,
1144
- pdbLister policylisters.PodDisruptionBudgetLister ,
1145
1141
disablePreemption bool ,
1146
1142
percentageOfNodesToScore int32 ) ScheduleAlgorithm {
1147
1143
return & genericScheduler {
@@ -1150,7 +1146,6 @@ func NewGenericScheduler(
1150
1146
extenders : extenders ,
1151
1147
nodeInfoSnapshot : nodeInfoSnapshot ,
1152
1148
pvcLister : pvcLister ,
1153
- pdbLister : pdbLister ,
1154
1149
disablePreemption : disablePreemption ,
1155
1150
percentageOfNodesToScore : percentageOfNodesToScore ,
1156
1151
}
0 commit comments