@@ -22,7 +22,6 @@ import (
22
22
"time"
23
23
24
24
v1 "k8s.io/api/core/v1"
25
- "k8s.io/apimachinery/pkg/labels"
26
25
"k8s.io/apimachinery/pkg/util/sets"
27
26
"k8s.io/apimachinery/pkg/util/wait"
28
27
utilfeature "k8s.io/apiserver/pkg/util/feature"
@@ -315,7 +314,9 @@ func (cache *schedulerCache) removeDeletedNodesFromSnapshot(snapshot *Snapshot)
315
314
}
316
315
}
317
316
318
- func (cache * schedulerCache ) ListPods (selector labels.Selector ) ([]* v1.Pod , error ) {
317
+ // PodCount returns the number of pods in the cache (including those from deleted nodes).
318
+ // DO NOT use outside of tests.
319
+ func (cache * schedulerCache ) PodCount () (int , error ) {
319
320
cache .mu .RLock ()
320
321
defer cache .mu .RUnlock ()
321
322
// podFilter is expected to return true for most or all of the pods. We
@@ -325,15 +326,11 @@ func (cache *schedulerCache) ListPods(selector labels.Selector) ([]*v1.Pod, erro
325
326
for _ , n := range cache .nodes {
326
327
maxSize += len (n .info .Pods )
327
328
}
328
- pods := make ([] * v1. Pod , 0 , maxSize )
329
+ count := 0
329
330
for _ , n := range cache .nodes {
330
- for _ , p := range n .info .Pods {
331
- if selector .Matches (labels .Set (p .Pod .Labels )) {
332
- pods = append (pods , p .Pod )
333
- }
334
- }
331
+ count += len (n .info .Pods )
335
332
}
336
- return pods , nil
333
+ return count , nil
337
334
}
338
335
339
336
func (cache * schedulerCache ) AssumePod (pod * v1.Pod ) error {
@@ -423,13 +420,6 @@ func (cache *schedulerCache) addPod(pod *v1.Pod) {
423
420
424
421
// Assumes that lock is already acquired.
425
422
func (cache * schedulerCache ) updatePod (oldPod , newPod * v1.Pod ) error {
426
- if _ , ok := cache .nodes [newPod .Spec .NodeName ]; ! ok {
427
- // The node might have been deleted already.
428
- // This is not a problem in the case where a pod update arrives before the
429
- // node creation, because we will always have a create pod event before
430
- // that, which will create the placeholder node item.
431
- return nil
432
- }
433
423
if err := cache .removePod (oldPod ); err != nil {
434
424
return err
435
425
}
@@ -438,18 +428,23 @@ func (cache *schedulerCache) updatePod(oldPod, newPod *v1.Pod) error {
438
428
}
439
429
440
430
// Assumes that lock is already acquired.
441
- // Removes a pod from the cached node info. When a node is removed, some pod
442
- // deletion events might arrive later. This is not a problem, as the pods in
443
- // the node are assumed to be removed already .
431
+ // Removes a pod from the cached node info. If the node information was already
432
+ // removed and there are no more pods left in the node, cleans up the node from
433
+ // the cache .
444
434
func (cache * schedulerCache ) removePod (pod * v1.Pod ) error {
445
435
n , ok := cache .nodes [pod .Spec .NodeName ]
446
436
if ! ok {
437
+ klog .Errorf ("node %v not found when trying to remove pod %v" , pod .Spec .NodeName , pod .Name )
447
438
return nil
448
439
}
449
440
if err := n .info .RemovePod (pod ); err != nil {
450
441
return err
451
442
}
452
- cache .moveNodeInfoToHead (pod .Spec .NodeName )
443
+ if len (n .info .Pods ) == 0 && n .info .Node () == nil {
444
+ cache .removeNodeInfoFromList (pod .Spec .NodeName )
445
+ } else {
446
+ cache .moveNodeInfoToHead (pod .Spec .NodeName )
447
+ }
453
448
return nil
454
449
}
455
450
@@ -619,21 +614,30 @@ func (cache *schedulerCache) UpdateNode(oldNode, newNode *v1.Node) error {
619
614
return n .info .SetNode (newNode )
620
615
}
621
616
622
- // RemoveNode removes a node from the cache.
623
- // Some nodes might still have pods because their deletion events didn't arrive
624
- // yet. For most intents and purposes, those pods are removed from the cache,
625
- // having it's source of truth in the cached nodes .
626
- // However, some information on pods (assumedPods, podStates) persist. These
627
- // caches will be eventually consistent as pod deletion events arrive .
617
+ // RemoveNode removes a node from the cache's tree .
618
+ // The node might still have pods because their deletion events didn't arrive
619
+ // yet. Those pods are considered removed from the cache, being the node tree
620
+ // the source of truth.
621
+ // However, we keep a ghost node with the list of pods until all pod deletion
622
+ // events have arrived. A ghost node is skipped from snapshots .
628
623
func (cache * schedulerCache ) RemoveNode (node * v1.Node ) error {
629
624
cache .mu .Lock ()
630
625
defer cache .mu .Unlock ()
631
626
632
- _ , ok := cache .nodes [node .Name ]
627
+ n , ok := cache .nodes [node .Name ]
633
628
if ! ok {
634
629
return fmt .Errorf ("node %v is not found" , node .Name )
635
630
}
636
- cache .removeNodeInfoFromList (node .Name )
631
+ n .info .RemoveNode ()
632
+ // We remove NodeInfo for this node only if there aren't any pods on this node.
633
+ // We can't do it unconditionally, because notifications about pods are delivered
634
+ // in a different watch, and thus can potentially be observed later, even though
635
+ // they happened before node removal.
636
+ if len (n .info .Pods ) == 0 {
637
+ cache .removeNodeInfoFromList (node .Name )
638
+ } else {
639
+ cache .moveNodeInfoToHead (node .Name )
640
+ }
637
641
if err := cache .nodeTree .removeNode (node ); err != nil {
638
642
return err
639
643
}
@@ -736,19 +740,6 @@ func (cache *schedulerCache) expirePod(key string, ps *podState) error {
736
740
return nil
737
741
}
738
742
739
- // GetNodeInfo returns cached data for the node name.
740
- func (cache * schedulerCache ) GetNodeInfo (nodeName string ) (* v1.Node , error ) {
741
- cache .mu .RLock ()
742
- defer cache .mu .RUnlock ()
743
-
744
- n , ok := cache .nodes [nodeName ]
745
- if ! ok {
746
- return nil , fmt .Errorf ("node %q not found in cache" , nodeName )
747
- }
748
-
749
- return n .info .Node (), nil
750
- }
751
-
752
743
// updateMetrics updates cache size metric values for pods, assumed pods, and nodes
753
744
func (cache * schedulerCache ) updateMetrics () {
754
745
metrics .CacheSize .WithLabelValues ("assumed_pods" ).Set (float64 (len (cache .assumedPods )))
0 commit comments