Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
297 changes: 149 additions & 148 deletions cluster-autoscaler/FAQ.md

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions cluster-autoscaler/cloudprovider/clusterapi/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,8 @@ metadata:
cluster.x-k8s.io/autoscaling-options-scaledownunreadytime: "20m0s"
# overrides --max-node-provision-time global value for that specific MachineDeployment
cluster.x-k8s.io/autoscaling-options-maxnodeprovisiontime: "20m0s"
# overrides --max-node-startup-time global value for that specific MachineDeployment
cluster.x-k8s.io/autoscaling-options-maxnodestartuptime: "20m0s"
```

#### CPU Architecture awareness for single-arch clusters
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,9 @@ func (ng *nodegroup) GetOptions(defaults config.NodeGroupAutoscalingOptions) (*c
if opt, ok := getDurationOption(options, ng.Id(), config.DefaultMaxNodeProvisionTimeKey); ok {
defaults.MaxNodeProvisionTime = opt
}
if opt, ok := getDurationOption(options, ng.Id(), config.DefaultMaxNodeStartupTimeKey); ok {
defaults.MaxNodeStartupTime = opt
}

return &defaults, nil
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1703,6 +1703,7 @@ func TestNodeGroupGetOptions(t *testing.T) {
ScaleDownUnneededTime: time.Second,
ScaleDownUnreadyTime: time.Minute,
MaxNodeProvisionTime: 15 * time.Minute,
MaxNodeStartupTime: 35 * time.Minute,
}

cases := []struct {
Expand All @@ -1723,13 +1724,15 @@ func TestNodeGroupGetOptions(t *testing.T) {
config.DefaultScaleDownUnneededTimeKey: "1h",
config.DefaultScaleDownUnreadyTimeKey: "30m",
config.DefaultMaxNodeProvisionTimeKey: "60m",
config.DefaultMaxNodeStartupTimeKey: "35m",
},
expected: &config.NodeGroupAutoscalingOptions{
ScaleDownGpuUtilizationThreshold: 0.6,
ScaleDownUtilizationThreshold: 0.7,
ScaleDownUnneededTime: time.Hour,
ScaleDownUnreadyTime: 30 * time.Minute,
MaxNodeProvisionTime: 60 * time.Minute,
MaxNodeStartupTime: 35 * time.Minute,
},
},
{
Expand All @@ -1744,6 +1747,7 @@ func TestNodeGroupGetOptions(t *testing.T) {
ScaleDownUnneededTime: time.Minute,
ScaleDownUnreadyTime: defaultOptions.ScaleDownUnreadyTime,
MaxNodeProvisionTime: 15 * time.Minute,
MaxNodeStartupTime: 35 * time.Minute,
},
},
{
Expand Down
11 changes: 9 additions & 2 deletions cluster-autoscaler/clusterstate/clusterstate.go
Original file line number Diff line number Diff line change
Expand Up @@ -624,14 +624,21 @@ type Readiness struct {
func (csr *ClusterStateRegistry) updateReadinessStats(currentTime time.Time) {
perNodeGroup := make(map[string]Readiness)
total := Readiness{Time: currentTime}

maxNodeStartupTime := MaxNodeStartupTime
update := func(current Readiness, node *apiv1.Node, nr kube_util.NodeReadiness) Readiness {
nodeGroup, errNg := csr.cloudProvider.NodeGroupForNode(node)
if errNg == nil && nodeGroup != nil {
if startupTime, err := csr.nodeGroupConfigProcessor.GetMaxNodeStartupTime(nodeGroup); err == nil {
maxNodeStartupTime = startupTime
}
}
klog.V(1).Infof("Node %s: using maxNodeStartupTime = %v", node.Name, maxNodeStartupTime)
current.Registered = append(current.Registered, node.Name)
if _, isDeleted := csr.deletedNodes[node.Name]; isDeleted {
current.Deleted = append(current.Deleted, node.Name)
} else if nr.Ready {
current.Ready = append(current.Ready, node.Name)
} else if node.CreationTimestamp.Time.Add(MaxNodeStartupTime).After(currentTime) {
} else if node.CreationTimestamp.Time.Add(maxNodeStartupTime).After(currentTime) {
current.NotStarted = append(current.NotStarted, node.Name)
} else {
current.Unready = append(current.Unready, node.Name)
Expand Down
39 changes: 35 additions & 4 deletions cluster-autoscaler/clusterstate/clusterstate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,7 @@ func TestTooManyUnready(t *testing.T) {
clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{
MaxTotalUnreadyPercentage: 10,
OkTotalUnreadyCount: 1,
}, fakeLogRecorder, newBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker())
}, fakeLogRecorder, newBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute, MaxNodeStartupTime: 35 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker())
err := clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng2_1}, nil, now)
assert.NoError(t, err)
assert.False(t, clusterstate.IsClusterHealthy())
Expand Down Expand Up @@ -462,6 +462,37 @@ func TestUnreadyLongAfterCreation(t *testing.T) {
assert.Empty(t, upcomingRegistered["ng1"])
}

func TestUnreadyAfterCreationWithIncreasedStartupTime(t *testing.T) {
now := time.Now()

ng1_1 := BuildTestNode("ng1-1", 1000, 1000)
SetNodeReadyState(ng1_1, true, now.Add(-time.Minute))
ng2_1 := BuildTestNode("ng2-1", 1000, 1000)
SetNodeReadyState(ng2_1, false, now.Add(-time.Minute))
ng2_1.CreationTimestamp = metav1.Time{Time: now.Add(-30 * time.Minute)}

provider := testprovider.NewTestCloudProviderBuilder().Build()
provider.AddNodeGroup("ng1", 1, 10, 1)
provider.AddNodeGroup("ng2", 1, 10, 1)
provider.AddNode("ng1", ng1_1)
provider.AddNode("ng2", ng2_1)

assert.NotNil(t, provider)
fakeClient := &fake.Clientset{}
fakeLogRecorder, _ := utils.NewStatusMapRecorder(fakeClient, "kube-system", kube_record.NewFakeRecorder(5), false, "some-map")
clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{
MaxTotalUnreadyPercentage: 10,
OkTotalUnreadyCount: 1,
}, fakeLogRecorder, newBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute, MaxNodeStartupTime: 35 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker())
err := clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng2_1}, nil, now)
assert.NoError(t, err)
assert.Equal(t, 0, len(clusterstate.GetClusterReadiness().Unready))
assert.Equal(t, 1, len(clusterstate.GetClusterReadiness().NotStarted))
upcoming, upcomingRegistered := clusterstate.GetUpcomingNodes()
assert.Equal(t, 0, upcoming["ng1"])
assert.Empty(t, upcomingRegistered["ng1"])
}

func TestNotStarted(t *testing.T) {
now := time.Now()

Expand All @@ -484,7 +515,7 @@ func TestNotStarted(t *testing.T) {
clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{
MaxTotalUnreadyPercentage: 10,
OkTotalUnreadyCount: 1,
}, fakeLogRecorder, newBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker())
}, fakeLogRecorder, newBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute, MaxNodeStartupTime: 35 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker())
err := clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng2_1}, nil, now)
assert.NoError(t, err)
assert.Equal(t, 1, len(clusterstate.GetClusterReadiness().NotStarted))
Expand Down Expand Up @@ -546,7 +577,7 @@ func TestRegisterScaleDown(t *testing.T) {
clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{
MaxTotalUnreadyPercentage: 10,
OkTotalUnreadyCount: 1,
}, fakeLogRecorder, newBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker())
}, fakeLogRecorder, newBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute, MaxNodeStartupTime: 35 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker())
now := time.Now()
clusterstate.RegisterScaleDown(provider.GetNodeGroup("ng1"), "ng1-1", now.Add(time.Minute), now)
assert.Equal(t, 1, len(clusterstate.scaleDownRequests))
Expand Down Expand Up @@ -639,7 +670,7 @@ func TestUpcomingNodes(t *testing.T) {
clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{
MaxTotalUnreadyPercentage: 10,
OkTotalUnreadyCount: 1,
}, fakeLogRecorder, newBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker())
}, fakeLogRecorder, newBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute, MaxNodeStartupTime: 15 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker())
err := clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng2_1, ng3_1, ng4_1, ng5_1, ng5_2}, nil, now)
assert.NoError(t, err)
assert.Empty(t, clusterstate.GetScaleUpFailures())
Expand Down
2 changes: 2 additions & 0 deletions cluster-autoscaler/config/autoscaling_options.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ type NodeGroupAutoscalingOptions struct {
ScaleDownUnreadyTime time.Duration
// Maximum time CA waits for node to be provisioned
MaxNodeProvisionTime time.Duration
// Maximum time CA waits for node to be ready from registered
MaxNodeStartupTime time.Duration
// ZeroOrMaxNodeScaling means that a node group should be scaled up to maximum size or down to zero nodes all at once instead of one-by-one.
ZeroOrMaxNodeScaling bool
// AllowNonAtomicScaleUpToMax indicates that partially failing scale-ups of ZeroOrMaxNodeScaling node groups should not be cancelled
Expand Down
3 changes: 2 additions & 1 deletion cluster-autoscaler/config/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,10 @@ const (
DefaultScaleDownUnreadyTimeKey = "scaledownunreadytime"
// DefaultMaxNodeProvisionTimeKey identifies MaxNodeProvisionTime autoscaling option
DefaultMaxNodeProvisionTimeKey = "maxnodeprovisiontime"
// DefaultMaxNodeStartupTimeKey identifies MaxNodeProvisionTime autoscaling option
DefaultMaxNodeStartupTimeKey = "maxnodestartuptime"
// DefaultIgnoreDaemonSetsUtilizationKey identifies IgnoreDaemonSetsUtilization autoscaling option
DefaultIgnoreDaemonSetsUtilizationKey = "ignoredaemonsetsutilization"

// DefaultScaleDownUnneededTime is the default time duration for which CA waits before deleting an unneeded node
DefaultScaleDownUnneededTime = 10 * time.Minute
// DefaultScaleDownUnreadyTime identifies ScaleDownUnreadyTime autoscaling option
Expand Down
2 changes: 2 additions & 0 deletions cluster-autoscaler/config/flags/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ var (
scaleUpFromZero = flag.Bool("scale-up-from-zero", true, "Should CA scale up when there are 0 ready nodes.")
parallelScaleUp = flag.Bool("parallel-scale-up", false, "Whether to allow parallel node groups scale up. Experimental: may not work on some cloud providers, enable at your own risk.")
maxNodeProvisionTime = flag.Duration("max-node-provision-time", 15*time.Minute, "The default maximum time CA waits for node to be provisioned - the value can be overridden per node group")
maxNodeStartupTime = flag.Duration("max-node-start-up-time", 15*time.Minute, "The maximum time from the moment the node is registered to the time the node is ready - the value can be overridden per node group")
maxPodEvictionTime = flag.Duration("max-pod-eviction-time", 2*time.Minute, "Maximum time CA tries to evict a pod before giving up")
nodeGroupsFlag = multiStringFlag(
"nodes",
Expand Down Expand Up @@ -290,6 +291,7 @@ func createAutoscalingOptions() config.AutoscalingOptions {
ScaleDownUnreadyTime: *scaleDownUnreadyTime,
IgnoreDaemonSetsUtilization: *ignoreDaemonSetsUtilization,
MaxNodeProvisionTime: *maxNodeProvisionTime,
MaxNodeStartupTime: *maxNodeStartupTime,
},
CloudConfig: *cloudConfig,
CloudProviderName: *cloudProviderFlag,
Expand Down
2 changes: 1 addition & 1 deletion cluster-autoscaler/core/static_autoscaler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2257,7 +2257,7 @@ func TestStaticAutoscalerUpcomingScaleDownCandidates(t *testing.T) {

// Create CSR with unhealthy cluster protection effectively disabled, to guarantee we reach the tested logic.
csrConfig := clusterstate.ClusterStateRegistryConfig{OkTotalUnreadyCount: nodeGroupCount * unreadyNodesCount}
csr := clusterstate.NewClusterStateRegistry(provider, csrConfig, ctx.LogRecorder, NewBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute}), processors.AsyncNodeGroupStateChecker)
csr := clusterstate.NewClusterStateRegistry(provider, csrConfig, ctx.LogRecorder, NewBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute, MaxNodeStartupTime: 15 * time.Minute}), processors.AsyncNodeGroupStateChecker)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

merge conflict pro-tip: when you rebase you'll notice that the variable returned from NewScaleTestAutoscalingContext has been changed to autoscalingCtx


// Setting the Actuator is necessary for testing any scale-down logic, it shouldn't have anything to do in this test.
actuator := actuation.NewActuator(&ctx, csr, deletiontracker.NewNodeDeletionTracker(0*time.Second), options.NodeDeleteOptions{}, nil, processorstest.NewTestProcessors(&ctx).NodeGroupConfigProcessor)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ type NodeGroupConfigProcessor interface {
GetScaleDownGpuUtilizationThreshold(nodeGroup cloudprovider.NodeGroup) (float64, error)
// GetMaxNodeProvisionTime return MaxNodeProvisionTime value that should be used for a given NodeGroup.
GetMaxNodeProvisionTime(nodeGroup cloudprovider.NodeGroup) (time.Duration, error)
// GetMaxNodeStartupTime return MaxNodeStartupTime value that should be used for a given NodeGroup.
GetMaxNodeStartupTime(nodeGroup cloudprovider.NodeGroup) (time.Duration, error)
// GetIgnoreDaemonSetsUtilization returns IgnoreDaemonSetsUtilization value that should be used for a given NodeGroup.
GetIgnoreDaemonSetsUtilization(nodeGroup cloudprovider.NodeGroup) (bool, error)
// CleanUp cleans up processor's internal structures.
Expand Down Expand Up @@ -108,6 +110,18 @@ func (p *DelegatingNodeGroupConfigProcessor) GetMaxNodeProvisionTime(nodeGroup c
return ngConfig.MaxNodeProvisionTime, nil
}

// GetMaxNodeStartupTime returns MaxNodeStartupTime value that should be used for a given NodeGroup.
func (p *DelegatingNodeGroupConfigProcessor) GetMaxNodeStartupTime(nodeGroup cloudprovider.NodeGroup) (time.Duration, error) {
ngConfig, err := nodeGroup.GetOptions(p.nodeGroupDefaults)
if err != nil && err != cloudprovider.ErrNotImplemented {
return 15 * time.Minute, err
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we want to return p.nodeGroupDefaults.MaxNodeStartupTime here as well

}
if ngConfig == nil || err == cloudprovider.ErrNotImplemented {
return p.nodeGroupDefaults.MaxNodeStartupTime, nil
}
return ngConfig.MaxNodeStartupTime, nil
}

// GetIgnoreDaemonSetsUtilization returns IgnoreDaemonSetsUtilization value that should be used for a given NodeGroup.
func (p *DelegatingNodeGroupConfigProcessor) GetIgnoreDaemonSetsUtilization(nodeGroup cloudprovider.NodeGroup) (bool, error) {
ngConfig, err := nodeGroup.GetOptions(p.nodeGroupDefaults)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ func TestDelegatingNodeGroupConfigProcessor(t *testing.T) {
ScaleDownGpuUtilizationThreshold: 0.6,
ScaleDownUtilizationThreshold: 0.5,
MaxNodeProvisionTime: 15 * time.Minute,
MaxNodeStartupTime: 15 * time.Minute,
IgnoreDaemonSetsUtilization: true,
}
ngOpts := &config.NodeGroupAutoscalingOptions{
Expand All @@ -55,6 +56,7 @@ func TestDelegatingNodeGroupConfigProcessor(t *testing.T) {
ScaleDownGpuUtilizationThreshold: 0.85,
ScaleDownUtilizationThreshold: 0.75,
MaxNodeProvisionTime: 60 * time.Minute,
MaxNodeStartupTime: 35 * time.Minute,
IgnoreDaemonSetsUtilization: false,
}

Expand Down Expand Up @@ -109,6 +111,17 @@ func TestDelegatingNodeGroupConfigProcessor(t *testing.T) {
assert.Equal(t, res, results[w])
}

testMaxNodeStartupTime := func(t *testing.T, p NodeGroupConfigProcessor, ng cloudprovider.NodeGroup, w Want, we error) {
res, err := p.GetMaxNodeStartupTime(ng)
assert.Equal(t, err, we)
results := map[Want]time.Duration{
NIL: 15 * time.Minute,
GLOBAL: 15 * time.Minute,
NG: 35 * time.Minute,
}
assert.Equal(t, res, results[w])
}

// for IgnoreDaemonSetsUtilization
testIgnoreDSUtilization := func(t *testing.T, p NodeGroupConfigProcessor, ng cloudprovider.NodeGroup, w Want, we error) {
res, err := p.GetIgnoreDaemonSetsUtilization(ng)
Expand All @@ -127,13 +140,15 @@ func TestDelegatingNodeGroupConfigProcessor(t *testing.T) {
"ScaleDownUtilizationThreshold": testUtilizationThreshold,
"ScaleDownGpuUtilizationThreshold": testGpuThreshold,
"MaxNodeProvisionTime": testMaxNodeProvisionTime,
"MaxNodeStartupTime": testMaxNodeStartupTime,
"IgnoreDaemonSetsUtilization": testIgnoreDSUtilization,
"MultipleOptions": func(t *testing.T, p NodeGroupConfigProcessor, ng cloudprovider.NodeGroup, w Want, we error) {
testUnneededTime(t, p, ng, w, we)
testUnreadyTime(t, p, ng, w, we)
testUtilizationThreshold(t, p, ng, w, we)
testGpuThreshold(t, p, ng, w, we)
testMaxNodeProvisionTime(t, p, ng, w, we)
testMaxNodeStartupTime(t, p, ng, w, we)
testIgnoreDSUtilization(t, p, ng, w, we)
},
"RepeatingTheSameCallGivesConsistentResults": func(t *testing.T, p NodeGroupConfigProcessor, ng cloudprovider.NodeGroup, w Want, we error) {
Expand Down
Loading