@@ -420,13 +420,6 @@ func (cache *schedulerCache) addPod(pod *v1.Pod) {
420
420
421
421
// Assumes that lock is already acquired.
422
422
func (cache * schedulerCache ) updatePod (oldPod , newPod * v1.Pod ) error {
423
- if _ , ok := cache .nodes [newPod .Spec .NodeName ]; ! ok {
424
- // The node might have been deleted already.
425
- // This is not a problem in the case where a pod update arrives before the
426
- // node creation, because we will always have a create pod event before
427
- // that, which will create the placeholder node item.
428
- return nil
429
- }
430
423
if err := cache .removePod (oldPod ); err != nil {
431
424
return err
432
425
}
@@ -435,18 +428,23 @@ func (cache *schedulerCache) updatePod(oldPod, newPod *v1.Pod) error {
435
428
}
436
429
437
430
// Assumes that lock is already acquired.
438
- // Removes a pod from the cached node info. When a node is removed, some pod
439
- // deletion events might arrive later. This is not a problem, as the pods in
440
- // the node are assumed to be removed already .
431
+ // Removes a pod from the cached node info. If the node information was already
432
+ // removed and there are no more pods left in the node, cleans up the node from
433
+ // the cache .
441
434
func (cache * schedulerCache ) removePod (pod * v1.Pod ) error {
442
435
n , ok := cache .nodes [pod .Spec .NodeName ]
443
436
if ! ok {
437
+ klog .Errorf ("node %v not found when trying to remove pod %v" , pod .Spec .NodeName , pod .Name )
444
438
return nil
445
439
}
446
440
if err := n .info .RemovePod (pod ); err != nil {
447
441
return err
448
442
}
449
- cache .moveNodeInfoToHead (pod .Spec .NodeName )
443
+ if len (n .info .Pods ) == 0 && n .info .Node () == nil {
444
+ cache .removeNodeInfoFromList (pod .Spec .NodeName )
445
+ } else {
446
+ cache .moveNodeInfoToHead (pod .Spec .NodeName )
447
+ }
450
448
return nil
451
449
}
452
450
@@ -616,21 +614,30 @@ func (cache *schedulerCache) UpdateNode(oldNode, newNode *v1.Node) error {
616
614
return n .info .SetNode (newNode )
617
615
}
618
616
619
- // RemoveNode removes a node from the cache.
620
- // Some nodes might still have pods because their deletion events didn't arrive
621
- // yet. For most intents and purposes, those pods are removed from the cache,
622
- // having it's source of truth in the cached nodes .
623
- // However, some information on pods (assumedPods, podStates) persist. These
624
- // caches will be eventually consistent as pod deletion events arrive .
617
+ // RemoveNode removes a node from the cache's tree .
618
+ // The node might still have pods because their deletion events didn't arrive
619
+ // yet. Those pods are considered removed from the cache, being the node tree
620
+ // the source of truth.
621
+ // However, we keep a ghost node with the list of pods until all pod deletion
622
+ // events have arrived. A ghost node is skipped from snapshots .
625
623
func (cache * schedulerCache ) RemoveNode (node * v1.Node ) error {
626
624
cache .mu .Lock ()
627
625
defer cache .mu .Unlock ()
628
626
629
- _ , ok := cache .nodes [node .Name ]
627
+ n , ok := cache .nodes [node .Name ]
630
628
if ! ok {
631
629
return fmt .Errorf ("node %v is not found" , node .Name )
632
630
}
633
- cache .removeNodeInfoFromList (node .Name )
631
+ n .info .RemoveNode ()
632
+ // We remove NodeInfo for this node only if there aren't any pods on this node.
633
+ // We can't do it unconditionally, because notifications about pods are delivered
634
+ // in a different watch, and thus can potentially be observed later, even though
635
+ // they happened before node removal.
636
+ if len (n .info .Pods ) == 0 {
637
+ cache .removeNodeInfoFromList (node .Name )
638
+ } else {
639
+ cache .moveNodeInfoToHead (node .Name )
640
+ }
634
641
if err := cache .nodeTree .removeNode (node ); err != nil {
635
642
return err
636
643
}
0 commit comments