@@ -394,6 +394,40 @@ func (sd *ScaleDown) CleanUpUnneededNodes() {
394394 sd .unneededNodes = make (map [string ]time.Time )
395395}
396396
397+ func (sd * ScaleDown ) checkNodeUtilization (timestamp time.Time , node * apiv1.Node , nodeInfo * schedulernodeinfo.NodeInfo ) (simulator.UnremovableReason , * simulator.UtilizationInfo ) {
398+ // Skip nodes that were recently checked.
399+ if _ , found := sd .unremovableNodes [node .Name ]; found {
400+ return simulator .RecentlyUnremovable , nil
401+ }
402+
403+ // Skip nodes marked to be deleted, if they were marked recently.
404+ // Old-time marked nodes are again eligible for deletion - something went wrong with them
405+ // and they have not been deleted.
406+ if isNodeBeingDeleted (node , timestamp ) {
407+ klog .V (1 ).Infof ("Skipping %s from delete consideration - the node is currently being deleted" , node .Name )
408+ return simulator .CurrentlyBeingDeleted , nil
409+ }
410+
411+ // Skip nodes marked with no scale down annotation
412+ if hasNoScaleDownAnnotation (node ) {
413+ klog .V (1 ).Infof ("Skipping %s from delete consideration - the node is marked as no scale down" , node .Name )
414+ return simulator .ScaleDownDisabledAnnotation , nil
415+ }
416+
417+ utilInfo , err := simulator .CalculateUtilization (node , nodeInfo , sd .context .IgnoreDaemonSetsUtilization , sd .context .IgnoreMirrorPodsUtilization , sd .context .CloudProvider .GPULabel ())
418+ if err != nil {
419+ klog .Warningf ("Failed to calculate utilization for %s: %v" , node .Name , err )
420+ }
421+ klog .V (4 ).Infof ("Node %s - %s utilization %f" , node .Name , utilInfo .ResourceName , utilInfo .Utilization )
422+
423+ if ! sd .isNodeBelowUtilizationThreshold (node , utilInfo ) {
424+ klog .V (4 ).Infof ("Node %s is not suitable for removal - %s utilization too big (%f)" , node .Name , utilInfo .ResourceName , utilInfo .Utilization )
425+ return simulator .NotUnderutilized , & utilInfo
426+ }
427+
428+ return simulator .NoReason , & utilInfo
429+ }
430+
397431// UpdateUnneededNodes calculates which nodes are not needed, i.e. all pods can be scheduled somewhere else,
398432// and updates unneededNodes map accordingly. It also computes information where pods can be rescheduled and
399433// node utilization level. The computations are made only for the nodes managed by CA.
@@ -416,7 +450,7 @@ func (sd *ScaleDown) UpdateUnneededNodes(
416450 return errors .ToAutoscalerError (errors .InternalError , err )
417451 }
418452
419- sd .updateUnremovableNodes ()
453+ sd .updateUnremovableNodes (timestamp )
420454
421455 skipped := 0
422456 utilizationMap := make (map [string ]simulator.UtilizationInfo )
@@ -432,44 +466,20 @@ func (sd *ScaleDown) UpdateUnneededNodes(
432466 continue
433467 }
434468
435- // Skip nodes that were recently checked.
436- if unremovableTimestamp , found := sd .unremovableNodes [node .Name ]; found {
437- if unremovableTimestamp .After (timestamp ) {
438- sd .addUnremovableNodeReason (node , simulator .RecentlyUnremovable )
469+ reason , utilInfo := sd .checkNodeUtilization (timestamp , node , nodeInfo )
470+ if utilInfo != nil {
471+ utilizationMap [node .Name ] = * utilInfo
472+ }
473+ if reason != simulator .NoReason {
474+ // For logging purposes.
475+ if reason == simulator .RecentlyUnremovable {
439476 skipped ++
440- continue
441477 }
442- delete (sd .unremovableNodes , node .Name )
443- }
444478
445- // Skip nodes marked to be deleted, if they were marked recently.
446- // Old-time marked nodes are again eligible for deletion - something went wrong with them
447- // and they have not been deleted.
448- if isNodeBeingDeleted (node , timestamp ) {
449- klog .V (1 ).Infof ("Skipping %s from delete consideration - the node is currently being deleted" , node .Name )
450- sd .addUnremovableNodeReason (node , simulator .CurrentlyBeingDeleted )
451- continue
452- }
453-
454- // Skip nodes marked with no scale down annotation
455- if hasNoScaleDownAnnotation (node ) {
456- klog .V (1 ).Infof ("Skipping %s from delete consideration - the node is marked as no scale down" , node .Name )
457- sd .addUnremovableNodeReason (node , simulator .ScaleDownDisabledAnnotation )
479+ sd .addUnremovableNodeReason (node , reason )
458480 continue
459481 }
460482
461- utilInfo , err := simulator .CalculateUtilization (node , nodeInfo , sd .context .IgnoreDaemonSetsUtilization , sd .context .IgnoreMirrorPodsUtilization , sd .context .CloudProvider .GPULabel ())
462- if err != nil {
463- klog .Warningf ("Failed to calculate utilization for %s: %v" , node .Name , err )
464- }
465- klog .V (4 ).Infof ("Node %s - %s utilization %f" , node .Name , utilInfo .ResourceName , utilInfo .Utilization )
466- utilizationMap [node .Name ] = utilInfo
467-
468- if ! sd .isNodeBelowUtilizationThreshold (node , utilInfo ) {
469- klog .V (4 ).Infof ("Node %s is not suitable for removal - %s utilization too big (%f)" , node .Name , utilInfo .ResourceName , utilInfo .Utilization )
470- sd .addUnremovableNodeReason (node , simulator .NotUnderutilized )
471- continue
472- }
473483 currentlyUnneededNodeNames = append (currentlyUnneededNodeNames , node .Name )
474484 }
475485
@@ -617,19 +627,21 @@ func (sd *ScaleDown) isNodeBelowUtilizationThreshold(node *apiv1.Node, utilInfo
617627// updateUnremovableNodes updates unremovableNodes map according to current
618628// state of the cluster. Removes from the map nodes that are no longer in the
619629// nodes list.
620- func (sd * ScaleDown ) updateUnremovableNodes () {
630+ func (sd * ScaleDown ) updateUnremovableNodes (timestamp time. Time ) {
621631 if len (sd .unremovableNodes ) <= 0 {
622632 return
623633 }
624634 newUnremovableNodes := make (map [string ]time.Time , len (sd .unremovableNodes ))
625- for oldUnremovable , since := range sd .unremovableNodes {
635+ for oldUnremovable , ttl := range sd .unremovableNodes {
626636 if _ , err := sd .context .ClusterSnapshot .NodeInfos ().Get (oldUnremovable ); err != nil {
627637 // Not logging on error level as most likely cause is that node is no longer in the cluster.
628638 klog .Infof ("Can't retrieve node %s from snapshot, removing from unremovable map, err: %v" , oldUnremovable , err )
629639 continue
630640 }
631- // Keep nodes that are still in the cluster.
632- newUnremovableNodes [oldUnremovable ] = since
641+ if ttl .After (timestamp ) {
642+ // Keep nodes that are still in the cluster and haven't expired yet.
643+ newUnremovableNodes [oldUnremovable ] = ttl
644+ }
633645 }
634646 sd .unremovableNodes = newUnremovableNodes
635647}
0 commit comments