@@ -65,6 +65,7 @@ const (
65
65
// The interface follows a pattern similar to cache.FIFO and cache.Heap and
66
66
// makes it easy to use those data structures as a SchedulingQueue.
67
67
type SchedulingQueue interface {
68
+ framework.PodNominator
68
69
Add (pod * v1.Pod ) error
69
70
// AddUnschedulableIfNotPresent adds an unschedulable pod back to scheduling queue.
70
71
// The podSchedulingCycle represents the current scheduling cycle number which can be
@@ -87,11 +88,6 @@ type SchedulingQueue interface {
87
88
// Close closes the SchedulingQueue so that the goroutine which is
88
89
// waiting to pop items can exit gracefully.
89
90
Close ()
90
- // UpdateNominatedPodForNode adds the given pod to the nominated pod map or
91
- // updates it if it already exists.
92
- UpdateNominatedPodForNode (pod * v1.Pod , nodeName string )
93
- // DeleteNominatedPodIfExists deletes nominatedPod from internal cache
94
- DeleteNominatedPodIfExists (pod * v1.Pod )
95
91
// NumUnschedulablePods returns the number of unschedulable pods exist in the SchedulingQueue.
96
92
NumUnschedulablePods () int
97
93
// Run starts the goroutines managing the queue.
@@ -116,6 +112,9 @@ func NominatedNodeName(pod *v1.Pod) string {
116
112
// is called unschedulableQ. The third queue holds pods that are moved from
117
113
// unschedulable queues and will be moved to active queue when backoff are completed.
118
114
type PriorityQueue struct {
115
+ // PodNominator abstracts the operations to maintain nominated Pods.
116
+ framework.PodNominator
117
+
119
118
stop chan struct {}
120
119
clock util.Clock
121
120
@@ -135,9 +134,6 @@ type PriorityQueue struct {
135
134
podBackoffQ * heap.Heap
136
135
// unschedulableQ holds pods that have been tried and determined unschedulable.
137
136
unschedulableQ * UnschedulablePodsMap
138
- // nominatedPods is a structures that stores pods which are nominated to run
139
- // on nodes.
140
- nominatedPods * nominatedPodMap
141
137
// schedulingCycle represents sequence number of scheduling cycle and is incremented
142
138
// when a pod is popped.
143
139
schedulingCycle int64
@@ -156,6 +152,7 @@ type priorityQueueOptions struct {
156
152
clock util.Clock
157
153
podInitialBackoffDuration time.Duration
158
154
podMaxBackoffDuration time.Duration
155
+ podNominator framework.PodNominator
159
156
}
160
157
161
158
// Option configures a PriorityQueue
@@ -168,20 +165,27 @@ func WithClock(clock util.Clock) Option {
168
165
}
169
166
}
170
167
171
- // WithPodInitialBackoffDuration sets pod initial backoff duration for PriorityQueue,
168
+ // WithPodInitialBackoffDuration sets pod initial backoff duration for PriorityQueue.
172
169
func WithPodInitialBackoffDuration (duration time.Duration ) Option {
173
170
return func (o * priorityQueueOptions ) {
174
171
o .podInitialBackoffDuration = duration
175
172
}
176
173
}
177
174
178
- // WithPodMaxBackoffDuration sets pod max backoff duration for PriorityQueue,
175
+ // WithPodMaxBackoffDuration sets pod max backoff duration for PriorityQueue.
179
176
func WithPodMaxBackoffDuration (duration time.Duration ) Option {
180
177
return func (o * priorityQueueOptions ) {
181
178
o .podMaxBackoffDuration = duration
182
179
}
183
180
}
184
181
182
+ // WithPodNominator sets pod nominator for PriorityQueue.
183
+ func WithPodNominator (pn framework.PodNominator ) Option {
184
+ return func (o * priorityQueueOptions ) {
185
+ o .podNominator = pn
186
+ }
187
+ }
188
+
185
189
var defaultPriorityQueueOptions = priorityQueueOptions {
186
190
clock : util.RealClock {},
187
191
podInitialBackoffDuration : DefaultPodInitialBackoffDuration ,
@@ -214,14 +218,18 @@ func NewPriorityQueue(
214
218
return lessFn (pInfo1 , pInfo2 )
215
219
}
216
220
221
+ if options .podNominator == nil {
222
+ options .podNominator = NewPodNominator ()
223
+ }
224
+
217
225
pq := & PriorityQueue {
226
+ PodNominator : options .podNominator ,
218
227
clock : options .clock ,
219
228
stop : make (chan struct {}),
220
229
podInitialBackoffDuration : options .podInitialBackoffDuration ,
221
230
podMaxBackoffDuration : options .podMaxBackoffDuration ,
222
231
activeQ : heap .NewWithRecorder (podInfoKeyFunc , comp , metrics .NewActivePodsRecorder ()),
223
232
unschedulableQ : newUnschedulablePodsMap (metrics .NewUnschedulablePodsRecorder ()),
224
- nominatedPods : newNominatedPodMap (),
225
233
moveRequestCycle : - 1 ,
226
234
}
227
235
pq .cond .L = & pq .lock
@@ -255,7 +263,7 @@ func (p *PriorityQueue) Add(pod *v1.Pod) error {
255
263
klog .Errorf ("Error: pod %v is already in the podBackoff queue." , nsNameForPod (pod ))
256
264
}
257
265
metrics .SchedulerQueueIncomingPods .WithLabelValues ("active" , PodAdd ).Inc ()
258
- p .nominatedPods . add (pod , "" )
266
+ p .PodNominator . AddNominatedPod (pod , "" )
259
267
p .cond .Broadcast ()
260
268
261
269
return nil
@@ -316,9 +324,8 @@ func (p *PriorityQueue) AddUnschedulableIfNotPresent(pInfo *framework.QueuedPodI
316
324
metrics .SchedulerQueueIncomingPods .WithLabelValues ("unschedulable" , ScheduleAttemptFailure ).Inc ()
317
325
}
318
326
319
- p .nominatedPods . add (pod , "" )
327
+ p .PodNominator . AddNominatedPod (pod , "" )
320
328
return nil
321
-
322
329
}
323
330
324
331
// flushBackoffQCompleted Moves all pods from backoffQ which have completed backoff in to activeQ
@@ -416,14 +423,14 @@ func (p *PriorityQueue) Update(oldPod, newPod *v1.Pod) error {
416
423
oldPodInfo := newQueuedPodInfoNoTimestamp (oldPod )
417
424
// If the pod is already in the active queue, just update it there.
418
425
if oldPodInfo , exists , _ := p .activeQ .Get (oldPodInfo ); exists {
419
- p .nominatedPods . update (oldPod , newPod )
426
+ p .PodNominator . UpdateNominatedPod (oldPod , newPod )
420
427
err := p .activeQ .Update (updatePod (oldPodInfo , newPod ))
421
428
return err
422
429
}
423
430
424
431
// If the pod is in the backoff queue, update it there.
425
432
if oldPodInfo , exists , _ := p .podBackoffQ .Get (oldPodInfo ); exists {
426
- p .nominatedPods . update (oldPod , newPod )
433
+ p .PodNominator . UpdateNominatedPod (oldPod , newPod )
427
434
p .podBackoffQ .Delete (oldPodInfo )
428
435
err := p .activeQ .Add (updatePod (oldPodInfo , newPod ))
429
436
if err == nil {
@@ -435,7 +442,7 @@ func (p *PriorityQueue) Update(oldPod, newPod *v1.Pod) error {
435
442
436
443
// If the pod is in the unschedulable queue, updating it may make it schedulable.
437
444
if usPodInfo := p .unschedulableQ .get (newPod ); usPodInfo != nil {
438
- p .nominatedPods . update (oldPod , newPod )
445
+ p .PodNominator . UpdateNominatedPod (oldPod , newPod )
439
446
if isPodUpdated (oldPod , newPod ) {
440
447
p .unschedulableQ .delete (usPodInfo .Pod )
441
448
err := p .activeQ .Add (updatePod (usPodInfo , newPod ))
@@ -451,7 +458,7 @@ func (p *PriorityQueue) Update(oldPod, newPod *v1.Pod) error {
451
458
// If pod is not in any of the queues, we put it in the active queue.
452
459
err := p .activeQ .Add (p .newQueuedPodInfo (newPod ))
453
460
if err == nil {
454
- p .nominatedPods . add (newPod , "" )
461
+ p .PodNominator . AddNominatedPod (newPod , "" )
455
462
p .cond .Broadcast ()
456
463
}
457
464
return err
@@ -462,7 +469,7 @@ func (p *PriorityQueue) Update(oldPod, newPod *v1.Pod) error {
462
469
func (p * PriorityQueue ) Delete (pod * v1.Pod ) error {
463
470
p .lock .Lock ()
464
471
defer p .lock .Unlock ()
465
- p .nominatedPods . delete (pod )
472
+ p .PodNominator . DeleteNominatedPodIfExists (pod )
466
473
err := p .activeQ .Delete (newQueuedPodInfoNoTimestamp (pod ))
467
474
if err != nil { // The item was probably not found in the activeQ.
468
475
p .podBackoffQ .Delete (newQueuedPodInfoNoTimestamp (pod ))
@@ -553,9 +560,8 @@ func (p *PriorityQueue) getUnschedulablePodsWithMatchingAffinityTerm(pod *v1.Pod
553
560
// but they are waiting for other pods to be removed from the node before they
554
561
// can be actually scheduled.
555
562
func (p * PriorityQueue ) NominatedPodsForNode (nodeName string ) []* v1.Pod {
556
- p .lock .RLock ()
557
- defer p .lock .RUnlock ()
558
- return p .nominatedPods .podsForNode (nodeName )
563
+ // TODO: make podsForNode() public?
564
+ return p .PodNominator .(* nominatedPodMap ).podsForNode (nodeName )
559
565
}
560
566
561
567
// PendingPods returns all the pending pods in the queue. This function is
@@ -585,21 +591,21 @@ func (p *PriorityQueue) Close() {
585
591
p .cond .Broadcast ()
586
592
}
587
593
588
- // DeleteNominatedPodIfExists deletes pod nominatedPods.
589
- func (p * PriorityQueue ) DeleteNominatedPodIfExists (pod * v1.Pod ) {
590
- p . lock .Lock ()
591
- p . nominatedPods .delete (pod )
592
- p . lock .Unlock ()
594
+ // DeleteNominatedPodIfExists deletes < pod> from nominatedPods.
595
+ func (npm * nominatedPodMap ) DeleteNominatedPodIfExists (pod * v1.Pod ) {
596
+ npm .Lock ()
597
+ npm .delete (pod )
598
+ npm .Unlock ()
593
599
}
594
600
595
- // UpdateNominatedPodForNode adds a pod to the nominated pods of the given node.
601
+ // AddNominatedPod adds a pod to the nominated pods of the given node.
596
602
// This is called during the preemption process after a node is nominated to run
597
603
// the pod. We update the structure before sending a request to update the pod
598
604
// object to avoid races with the following scheduling cycles.
599
- func (p * PriorityQueue ) UpdateNominatedPodForNode (pod * v1.Pod , nodeName string ) {
600
- p . lock .Lock ()
601
- p . nominatedPods .add (pod , nodeName )
602
- p . lock .Unlock ()
605
+ func (npm * nominatedPodMap ) AddNominatedPod (pod * v1.Pod , nodeName string ) {
606
+ npm .Lock ()
607
+ npm .add (pod , nodeName )
608
+ npm .Unlock ()
603
609
}
604
610
605
611
func (p * PriorityQueue ) podsCompareBackoffCompleted (podInfo1 , podInfo2 interface {}) bool {
@@ -721,6 +727,8 @@ type nominatedPodMap struct {
721
727
// nominatedPodToNode is map keyed by a Pod UID to the node name where it is
722
728
// nominated.
723
729
nominatedPodToNode map [ktypes.UID ]string
730
+
731
+ sync.RWMutex
724
732
}
725
733
726
734
func (npm * nominatedPodMap ) add (p * v1.Pod , nodeName string ) {
@@ -762,7 +770,10 @@ func (npm *nominatedPodMap) delete(p *v1.Pod) {
762
770
delete (npm .nominatedPodToNode , p .UID )
763
771
}
764
772
765
- func (npm * nominatedPodMap ) update (oldPod , newPod * v1.Pod ) {
773
+ // UpdateNominatedPod updates the <oldPod> with <newPod>.
774
+ func (npm * nominatedPodMap ) UpdateNominatedPod (oldPod , newPod * v1.Pod ) {
775
+ npm .Lock ()
776
+ defer npm .Unlock ()
766
777
// In some cases, an Update event with no "NominatedNode" present is received right
767
778
// after a node("NominatedNode") is reserved for this pod in memory.
768
779
// In this case, we need to keep reserving the NominatedNode when updating the pod pointer.
@@ -784,13 +795,16 @@ func (npm *nominatedPodMap) update(oldPod, newPod *v1.Pod) {
784
795
}
785
796
786
797
func (npm * nominatedPodMap ) podsForNode (nodeName string ) []* v1.Pod {
798
+ npm .RLock ()
799
+ defer npm .RUnlock ()
787
800
if list , ok := npm .nominatedPods [nodeName ]; ok {
788
801
return list
789
802
}
790
803
return nil
791
804
}
792
805
793
- func newNominatedPodMap () * nominatedPodMap {
806
+ // NewPodNominator creates a nominatedPodMap as a backing of framework.PodNominator.
807
+ func NewPodNominator () framework.PodNominator {
794
808
return & nominatedPodMap {
795
809
nominatedPods : make (map [string ][]* v1.Pod ),
796
810
nominatedPodToNode : make (map [ktypes.UID ]string ),
0 commit comments