@@ -801,6 +801,10 @@ func (a *StaticAutoscaler) removeOldUnregisteredNodes(allUnregisteredNodes []clu
801801 continue
802802 }
803803
804+ if len (nodesToDelete ) == 0 {
805+ continue
806+ }
807+
804808 if a .ForceDeleteLongUnregisteredNodes {
805809 err = nodeGroup .ForceDeleteNodes (nodesToDelete )
806810 if err == cloudprovider .ErrNotImplemented {
@@ -880,12 +884,14 @@ func (a *StaticAutoscaler) deleteCreatedNodesWithErrors() {
880884 if nodeGroup == nil {
881885 err = fmt .Errorf ("node group %s not found" , nodeGroupId )
882886 } else if nodesToDelete , err = overrideNodesToDeleteForZeroOrMax (a .NodeGroupDefaults , nodeGroup , nodesToDelete ); err == nil {
883- err = nodeGroup .DeleteNodes (nodesToDelete )
887+ if len (nodesToDelete ) > 0 {
888+ err = nodeGroup .DeleteNodes (nodesToDelete )
889+ }
884890 }
885891
886892 if err != nil {
887893 klog .Warningf ("Error while trying to delete nodes from %v: %v" , nodeGroupId , err )
888- } else {
894+ } else if len ( nodesToDelete ) > 0 {
889895 deletedAny = true
890896 a .clusterStateRegistry .InvalidateNodeInstancesCacheEntry (nodeGroup )
891897 }
@@ -898,21 +904,29 @@ func (a *StaticAutoscaler) deleteCreatedNodesWithErrors() {
898904}
899905
900906// overrideNodesToDeleteForZeroOrMax returns a list of nodes to delete, taking into account that
901- // node deletion for a "ZeroOrMaxNodeScaling" node group is atomic and should delete all nodes.
907+ // node deletion for a "ZeroOrMaxNodeScaling" should either keep or remove all the nodes.
902908// For a non-"ZeroOrMaxNodeScaling" node group it returns the unchanged list of nodes to delete.
903909func overrideNodesToDeleteForZeroOrMax (defaults config.NodeGroupAutoscalingOptions , nodeGroup cloudprovider.NodeGroup , nodesToDelete []* apiv1.Node ) ([]* apiv1.Node , error ) {
904910 opts , err := nodeGroup .GetOptions (defaults )
905911 if err != nil && err != cloudprovider .ErrNotImplemented {
906912 return []* apiv1.Node {}, fmt .Errorf ("Failed to get node group options for %s: %s" , nodeGroup .Id (), err )
907913 }
908914 // If a scale-up of "ZeroOrMaxNodeScaling" node group failed, the cleanup
909- // should stick to the all-or-nothing principle. Deleting all nodes.
915+ // node deletion for a "ZeroOrMaxNodeScaling" node group is atomic and should delete all nodes or none .
910916 if opts != nil && opts .ZeroOrMaxNodeScaling {
911917 instances , err := nodeGroup .Nodes ()
912918 if err != nil {
913919 return []* apiv1.Node {}, fmt .Errorf ("Failed to fill in nodes to delete from group %s based on ZeroOrMaxNodeScaling option: %s" , nodeGroup .Id (), err )
914920 }
915- return instancesToFakeNodes (instances ), nil
921+
922+ // Remove all nodes in case when either:
923+ // 1. All nodes are failing
924+ // 2. AllowNonAtomicScaleUpToMax is false which means we want to atomically remove partially failed node groups
925+ if len (instances ) == len (nodesToDelete ) || ! opts .AllowNonAtomicScaleUpToMax {
926+ // Remove all nodes
927+ return instancesToFakeNodes (instances ), nil
928+ }
929+ return []* apiv1.Node {}, nil
916930 }
917931 // No override needed.
918932 return nodesToDelete , nil
0 commit comments