Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,11 @@ func (ali *aliCloudProvider) Cleanup() error {
return nil
}

// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (ali *aliCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}

// AliRef contains a reference to ECS instance or .
type AliRef struct {
ID string
Expand Down
5 changes: 5 additions & 0 deletions cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,11 @@ func (aws *awsCloudProvider) Refresh() error {
return aws.awsManager.Refresh()
}

// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (aws *awsCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}

// AwsRef contains a reference to some entity in AWS world.
type AwsRef struct {
Name string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,11 @@ func (azure *AzureCloudProvider) Refresh() error {
return azure.azureManager.Refresh()
}

// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (azure *AzureCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}

// azureRef contains a reference to some entity in Azure world.
type azureRef struct {
Name string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,11 @@ func (baiducloud *baiducloudCloudProvider) Refresh() error {
return nil
}

// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (baiducloud *baiducloudCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}

// BaiducloudRef contains a reference to some entity in baiducloud world.
type BaiducloudRef struct {
Name string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,11 @@ func (d *bizflycloudCloudProvider) Refresh() error {
return d.manager.Refresh()
}

// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (d *bizflycloudCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}

// BuildBizflyCloud builds the Bizflycloud cloud provider.
func BuildBizflyCloud(
opts config.AutoscalingOptions,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,11 @@ func (b *brightboxCloudProvider) Cleanup() error {
return nil
}

// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (b *brightboxCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}

// BuildBrightbox builds the Brightbox provider
func BuildBrightbox(
opts config.AutoscalingOptions,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,11 @@ func (ccp *cherryCloudProvider) Cleanup() error {
return nil
}

// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (ccp *cherryCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}

// BuildCherry is called by the autoscaler to build a Cherry Servers cloud provider.
//
// The cherryManager is created here, and the node groups are created
Expand Down
5 changes: 5 additions & 0 deletions cluster-autoscaler/cloudprovider/civo/civo_cloud_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,11 @@ func (d *civoCloudProvider) Refresh() error {
return d.manager.Refresh()
}

// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (d *civoCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}

// BuildCivo builds the Civo cloud provider.
func BuildCivo(
opts config.AutoscalingOptions,
Expand Down
8 changes: 8 additions & 0 deletions cluster-autoscaler/cloudprovider/cloud_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,14 @@ type CloudProvider interface {
// Refresh is called before every main loop and can be used to dynamically update cloud provider state.
// In particular the list of node groups returned by NodeGroups can change as a result of CloudProvider.Refresh().
Refresh() error

// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down. This function
// will be called during prefiltering of nodes for scaledown to allow cloud providers the opportunity
// to reject a node for scale down. This may be used in cases where nodes are undergoing upgrades or other
// cloud-specific behavior where the cluster autoscaler should not begin cordoning, draining, and tainting
// the node.
// Returns true if the node can be safely scaled down or false otherwise.
IsNodeCandidateForScaleDown(*apiv1.Node) (bool, error)
}

// ErrNotImplemented is returned if a method is not implemented.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,11 @@ func (provider *cloudStackCloudProvider) Pricing() (cloudprovider.PricingModel,
return nil, cloudprovider.ErrNotImplemented
}

// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (provider *cloudStackCloudProvider) IsNodeCandidateForScaleDown(node *v1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}

// NewNodeGroup builds a theoretical node group based on the node definition provided. The node group is not automatically
// created on the cloud provider side. The node group is not returned by NodeGroups() until it is created.
func (provider *cloudStackCloudProvider) NewNodeGroup(machineType string, labels map[string]string, systemLabels map[string]string, taints []v1.Taint, extraResources map[string]resource.Quantity) (cloudprovider.NodeGroup, error) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -849,6 +849,27 @@ func (c *machineController) listMachinesForScalableResource(r *unstructured.Unst
}
}

func (c *machineController) listMachineSetsForMachineDeployment(r *unstructured.Unstructured) ([]*unstructured.Unstructured, error) {
selector := labels.SelectorFromSet(map[string]string{
machineDeploymentNameLabel: r.GetName(),
})
objs, err := c.machineSetInformer.Lister().ByNamespace(r.GetNamespace()).List(selector)
if err != nil {
return nil, fmt.Errorf("unable to list MachineSets for MachineDeployment %s: %w", r.GetName(), err)
}

results := make([]*unstructured.Unstructured, 0, len(objs))
for _, x := range objs {
u, ok := x.(*unstructured.Unstructured)
if !ok {
return nil, fmt.Errorf("expected unstructured resource from lister, not %T", x)
}
results = append(results, u.DeepCopy())
}

return results, nil
}

func (c *machineController) listScalableResources() ([]*unstructured.Unstructured, error) {
scalableResources, err := c.listResources(c.machineSetInformer.Lister())
if err != nil {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,68 @@ func (ng *nodegroup) GetOptions(defaults config.NodeGroupAutoscalingOptions) (*c
return &defaults, nil
}

func (ng *nodegroup) IsMachineDeploymentAndRollingOut() (bool, error) {
if ng.scalableResource.Kind() != machineDeploymentKind {
// Not a MachineDeployment.
return false, nil
}

machineSets, err := ng.machineController.listMachineSetsForMachineDeployment(ng.scalableResource.unstructured)
if err != nil {
return false, err
}

if len(machineSets) == 0 {
// No MachineSets => MD is not rolling out.
return false, nil
}

// Find the latest revision, the MachineSet with the latest revision is the MachineSet that
// matches the MachineDeployment spec.
var latestMSRevisionInt int64
for _, ms := range machineSets {
msRevision, ok := ms.GetAnnotations()[machineDeploymentRevisionAnnotation]
if !ok {
continue
}

msRevisionInt, err := strconv.ParseInt(msRevision, 10, 64)
if err != nil {
return false, errors.Wrapf(err, "failed to parse current revision on MachineSet %s", klog.KObj(ms))
}
latestMSRevisionInt = max(latestMSRevisionInt, msRevisionInt)
}
maxMSRevision := strconv.FormatInt(latestMSRevisionInt, 10)

for _, ms := range machineSets {
if ms.GetAnnotations()[machineDeploymentRevisionAnnotation] == maxMSRevision {
// Ignore the MachineSet with the latest revision
continue
}

// Check if any of the old MachineSets still have replicas
replicas, found, err := unstructured.NestedInt64(ms.UnstructuredContent(), "spec", "replicas")
if err != nil {
return false, errors.Wrapf(err, "failed to find spec replicas on MachineSet %s", klog.KObj(ms))
}
if found && replicas > 0 {
// Found old MachineSets that still has replicas => MD is still rolling out.
return true, nil
}
replicas, found, err = unstructured.NestedInt64(ms.UnstructuredContent(), "status", "replicas")
if err != nil {
return false, errors.Wrapf(err, "failed to find status replicas on MachineSet %s", klog.KObj(ms))
}
if found && replicas > 0 {
// Found old MachineSets that still has replicas => MD is still rolling out.
return true, nil
}
}

// Didn't find any old MachineSets that still have replicas => MD is not rolling out.
return false, nil
}

func newNodeGroupFromScalableResource(controller *machineController, unstructuredScalableResource *unstructured.Unstructured) (*nodegroup, error) {
// Ensure that the resulting node group would be allowed based on the autodiscovery specs if defined
if !controller.allowedByAutoDiscoverySpecs(unstructuredScalableResource) {
Expand Down
23 changes: 21 additions & 2 deletions cluster-autoscaler/cloudprovider/clusterapi/clusterapi_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"path"
"reflect"

"github.com/pkg/errors"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/client-go/discovery"
Expand All @@ -34,7 +35,7 @@ import (

"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
"k8s.io/autoscaler/cluster-autoscaler/config"
"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
caserrors "k8s.io/autoscaler/cluster-autoscaler/utils/errors"
"k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
)

Expand Down Expand Up @@ -92,7 +93,7 @@ func (p *provider) HasInstance(node *corev1.Node) (bool, error) {
return false, fmt.Errorf("machine not found for node %s: %v", node.Name, err)
}

func (*provider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) {
func (*provider) Pricing() (cloudprovider.PricingModel, caserrors.AutoscalerError) {
return nil, cloudprovider.ErrNotImplemented
}

Expand Down Expand Up @@ -140,6 +141,24 @@ func (p *provider) GetNodeGpuConfig(node *corev1.Node) *cloudprovider.GpuConfig
return gpu.GetNodeGPUFromCloudProvider(p, node)
}

// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (p *provider) IsNodeCandidateForScaleDown(node *corev1.Node) (bool, error) {
ng, err := p.controller.nodeGroupForNode(node)
if err != nil {
return false, errors.Wrapf(err, "failed to determine node group for node %s", klog.KObj(node))
}
if ng == nil {
klog.V(5).Infof("node %s is not part of a node group", klog.KObj(node))
return false, nil
}
rollingout, err := ng.IsMachineDeploymentAndRollingOut()
if err != nil {
return false, errors.Wrapf(err, "failed to determine rolling out status for MachineDeployment %s", ng.scalableResource.ID())
}
// A node is a good candidate for scale down if it is not currently part of a MachineDeployment that is rolling out.
return !rollingout, nil
}

func newProvider(
name string,
rl *cloudprovider.ResourceLimiter,
Expand Down
20 changes: 11 additions & 9 deletions cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,17 @@ import (
)

const (
cpuKey = "capacity.cluster-autoscaler.kubernetes.io/cpu"
memoryKey = "capacity.cluster-autoscaler.kubernetes.io/memory"
diskCapacityKey = "capacity.cluster-autoscaler.kubernetes.io/ephemeral-disk"
gpuTypeKey = "capacity.cluster-autoscaler.kubernetes.io/gpu-type"
gpuCountKey = "capacity.cluster-autoscaler.kubernetes.io/gpu-count"
maxPodsKey = "capacity.cluster-autoscaler.kubernetes.io/maxPods"
taintsKey = "capacity.cluster-autoscaler.kubernetes.io/taints"
labelsKey = "capacity.cluster-autoscaler.kubernetes.io/labels"
draDriverKey = "capacity.cluster-autoscaler.kubernetes.io/dra-driver"
cpuKey = "capacity.cluster-autoscaler.kubernetes.io/cpu"
memoryKey = "capacity.cluster-autoscaler.kubernetes.io/memory"
diskCapacityKey = "capacity.cluster-autoscaler.kubernetes.io/ephemeral-disk"
gpuTypeKey = "capacity.cluster-autoscaler.kubernetes.io/gpu-type"
gpuCountKey = "capacity.cluster-autoscaler.kubernetes.io/gpu-count"
maxPodsKey = "capacity.cluster-autoscaler.kubernetes.io/maxPods"
taintsKey = "capacity.cluster-autoscaler.kubernetes.io/taints"
labelsKey = "capacity.cluster-autoscaler.kubernetes.io/labels"
draDriverKey = "capacity.cluster-autoscaler.kubernetes.io/dra-driver"
machineDeploymentRevisionAnnotation = "machinedeployment.clusters.x-k8s.io/revision"
machineDeploymentNameLabel = "cluster.x-k8s.io/deployment-name"
// UnknownArch is used if the Architecture is Unknown
UnknownArch SystemArchitecture = ""
// Amd64 is used if the Architecture is x86_64
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,11 @@ func (c *CoreWeaveCloudProvider) Refresh() error {
return c.manager.Refresh()
}

// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (c *CoreWeaveCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}

// BuildCoreWeave builds the CoreWeave cloud provider with the given options and returns it.
func BuildCoreWeave(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider {
klog.V(4).Infof("Building CoreWeave cloud provider with options: %+v", opts)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,11 @@ func (d *digitaloceanCloudProvider) Refresh() error {
return d.manager.Refresh()
}

// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (d *digitaloceanCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}

// BuildDigitalOcean builds the DigitalOcean cloud provider.
func BuildDigitalOcean(
opts config.AutoscalingOptions,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,11 @@ func (pcp *equinixMetalCloudProvider) Cleanup() error {
return nil
}

// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (pcp *equinixMetalCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}

// BuildCloudProvider is called by the autoscaler to build an Equinix Metal cloud provider.
//
// The equinixMetalManager is created here, and the node groups are created
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,11 @@ func (e *exoscaleCloudProvider) Refresh() error {
return e.manager.Refresh()
}

// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (e *exoscaleCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}

// BuildExoscale builds the Exoscale cloud provider.
func BuildExoscale(_ config.AutoscalingOptions, discoveryOpts cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider {
manager, err := newManager(discoveryOpts)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,11 @@ func (e *externalGrpcCloudProvider) Refresh() error {
return nil
}

// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (e *externalGrpcCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}

// BuildExternalGrpc builds the externalgrpc cloud provider.
func BuildExternalGrpc(
opts config.AutoscalingOptions,
Expand Down
5 changes: 5 additions & 0 deletions cluster-autoscaler/cloudprovider/gce/gce_cloud_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,11 @@ func (gce *GceCloudProvider) Refresh() error {
return gce.gceManager.Refresh()
}

// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (gce *GceCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}

// GceRef contains s reference to some entity in GCE world.
type GceRef struct {
Project string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,11 @@ func (d *HetznerCloudProvider) Refresh() error {
return nil
}

// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (d *HetznerCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}

// BuildHetzner builds the Hetzner cloud provider.
func BuildHetzner(_ config.AutoscalingOptions, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider {
manager, err := newManager()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,11 @@ func (hcp *huaweicloudCloudProvider) Refresh() error {
return nil
}

// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (hcp *huaweicloudCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}

func (hcp *huaweicloudCloudProvider) buildAsgs(specs []string) error {
asgs, err := hcp.cloudServiceManager.ListScalingGroups()
if err != nil {
Expand Down
Loading
Loading