Skip to content

Commit f21b047

Browse files
authored
Merge pull request kubernetes#2916 from aleksandra-malinowska/delta-snapshot-13
More clean-ups in scale-down
2 parents 8b2173e + 2621709 commit f21b047

File tree

1 file changed

+49
-37
lines changed

1 file changed

+49
-37
lines changed

cluster-autoscaler/core/scale_down.go

Lines changed: 49 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,40 @@ func (sd *ScaleDown) CleanUpUnneededNodes() {
394394
sd.unneededNodes = make(map[string]time.Time)
395395
}
396396

397+
func (sd *ScaleDown) checkNodeUtilization(timestamp time.Time, node *apiv1.Node, nodeInfo *schedulernodeinfo.NodeInfo) (simulator.UnremovableReason, *simulator.UtilizationInfo) {
398+
// Skip nodes that were recently checked.
399+
if _, found := sd.unremovableNodes[node.Name]; found {
400+
return simulator.RecentlyUnremovable, nil
401+
}
402+
403+
// Skip nodes marked to be deleted, if they were marked recently.
404+
// Old-time marked nodes are again eligible for deletion - something went wrong with them
405+
// and they have not been deleted.
406+
if isNodeBeingDeleted(node, timestamp) {
407+
klog.V(1).Infof("Skipping %s from delete consideration - the node is currently being deleted", node.Name)
408+
return simulator.CurrentlyBeingDeleted, nil
409+
}
410+
411+
// Skip nodes marked with no scale down annotation
412+
if hasNoScaleDownAnnotation(node) {
413+
klog.V(1).Infof("Skipping %s from delete consideration - the node is marked as no scale down", node.Name)
414+
return simulator.ScaleDownDisabledAnnotation, nil
415+
}
416+
417+
utilInfo, err := simulator.CalculateUtilization(node, nodeInfo, sd.context.IgnoreDaemonSetsUtilization, sd.context.IgnoreMirrorPodsUtilization, sd.context.CloudProvider.GPULabel())
418+
if err != nil {
419+
klog.Warningf("Failed to calculate utilization for %s: %v", node.Name, err)
420+
}
421+
klog.V(4).Infof("Node %s - %s utilization %f", node.Name, utilInfo.ResourceName, utilInfo.Utilization)
422+
423+
if !sd.isNodeBelowUtilizationThreshold(node, utilInfo) {
424+
klog.V(4).Infof("Node %s is not suitable for removal - %s utilization too big (%f)", node.Name, utilInfo.ResourceName, utilInfo.Utilization)
425+
return simulator.NotUnderutilized, &utilInfo
426+
}
427+
428+
return simulator.NoReason, &utilInfo
429+
}
430+
397431
// UpdateUnneededNodes calculates which nodes are not needed, i.e. all pods can be scheduled somewhere else,
398432
// and updates unneededNodes map accordingly. It also computes information where pods can be rescheduled and
399433
// node utilization level. The computations are made only for the nodes managed by CA.
@@ -416,7 +450,7 @@ func (sd *ScaleDown) UpdateUnneededNodes(
416450
return errors.ToAutoscalerError(errors.InternalError, err)
417451
}
418452

419-
sd.updateUnremovableNodes()
453+
sd.updateUnremovableNodes(timestamp)
420454

421455
skipped := 0
422456
utilizationMap := make(map[string]simulator.UtilizationInfo)
@@ -432,44 +466,20 @@ func (sd *ScaleDown) UpdateUnneededNodes(
432466
continue
433467
}
434468

435-
// Skip nodes that were recently checked.
436-
if unremovableTimestamp, found := sd.unremovableNodes[node.Name]; found {
437-
if unremovableTimestamp.After(timestamp) {
438-
sd.addUnremovableNodeReason(node, simulator.RecentlyUnremovable)
469+
reason, utilInfo := sd.checkNodeUtilization(timestamp, node, nodeInfo)
470+
if utilInfo != nil {
471+
utilizationMap[node.Name] = *utilInfo
472+
}
473+
if reason != simulator.NoReason {
474+
// For logging purposes.
475+
if reason == simulator.RecentlyUnremovable {
439476
skipped++
440-
continue
441477
}
442-
delete(sd.unremovableNodes, node.Name)
443-
}
444478

445-
// Skip nodes marked to be deleted, if they were marked recently.
446-
// Old-time marked nodes are again eligible for deletion - something went wrong with them
447-
// and they have not been deleted.
448-
if isNodeBeingDeleted(node, timestamp) {
449-
klog.V(1).Infof("Skipping %s from delete consideration - the node is currently being deleted", node.Name)
450-
sd.addUnremovableNodeReason(node, simulator.CurrentlyBeingDeleted)
451-
continue
452-
}
453-
454-
// Skip nodes marked with no scale down annotation
455-
if hasNoScaleDownAnnotation(node) {
456-
klog.V(1).Infof("Skipping %s from delete consideration - the node is marked as no scale down", node.Name)
457-
sd.addUnremovableNodeReason(node, simulator.ScaleDownDisabledAnnotation)
479+
sd.addUnremovableNodeReason(node, reason)
458480
continue
459481
}
460482

461-
utilInfo, err := simulator.CalculateUtilization(node, nodeInfo, sd.context.IgnoreDaemonSetsUtilization, sd.context.IgnoreMirrorPodsUtilization, sd.context.CloudProvider.GPULabel())
462-
if err != nil {
463-
klog.Warningf("Failed to calculate utilization for %s: %v", node.Name, err)
464-
}
465-
klog.V(4).Infof("Node %s - %s utilization %f", node.Name, utilInfo.ResourceName, utilInfo.Utilization)
466-
utilizationMap[node.Name] = utilInfo
467-
468-
if !sd.isNodeBelowUtilizationThreshold(node, utilInfo) {
469-
klog.V(4).Infof("Node %s is not suitable for removal - %s utilization too big (%f)", node.Name, utilInfo.ResourceName, utilInfo.Utilization)
470-
sd.addUnremovableNodeReason(node, simulator.NotUnderutilized)
471-
continue
472-
}
473483
currentlyUnneededNodeNames = append(currentlyUnneededNodeNames, node.Name)
474484
}
475485

@@ -617,19 +627,21 @@ func (sd *ScaleDown) isNodeBelowUtilizationThreshold(node *apiv1.Node, utilInfo
617627
// updateUnremovableNodes updates unremovableNodes map according to current
618628
// state of the cluster. Removes from the map nodes that are no longer in the
619629
// nodes list.
620-
func (sd *ScaleDown) updateUnremovableNodes() {
630+
func (sd *ScaleDown) updateUnremovableNodes(timestamp time.Time) {
621631
if len(sd.unremovableNodes) <= 0 {
622632
return
623633
}
624634
newUnremovableNodes := make(map[string]time.Time, len(sd.unremovableNodes))
625-
for oldUnremovable, since := range sd.unremovableNodes {
635+
for oldUnremovable, ttl := range sd.unremovableNodes {
626636
if _, err := sd.context.ClusterSnapshot.NodeInfos().Get(oldUnremovable); err != nil {
627637
// Not logging on error level as most likely cause is that node is no longer in the cluster.
628638
klog.Infof("Can't retrieve node %s from snapshot, removing from unremovable map, err: %v", oldUnremovable, err)
629639
continue
630640
}
631-
// Keep nodes that are still in the cluster.
632-
newUnremovableNodes[oldUnremovable] = since
641+
if ttl.After(timestamp) {
642+
// Keep nodes that are still in the cluster and haven't expired yet.
643+
newUnremovableNodes[oldUnremovable] = ttl
644+
}
633645
}
634646
sd.unremovableNodes = newUnremovableNodes
635647
}

0 commit comments

Comments
 (0)