Skip to content

Commit 699c0b8

Browse files
enxebrefrobware
authored andcommitted
Let Nodes() return the list of all machines
The autoscaler expects provider implementations nodeGroups to implement the Nodes() function to return the number of instances belonging to the group regardless of they have become a kubernetes node or not. This information is then used for instance to realise about unregistered nodes https://github.com/kubernetes/autoscaler/blob/bf3a9fb52e3214dff0bea5ef2b97f17ad00a7702/cluster-autoscaler/clusterstate/clusterstate.go#L307-L311
1 parent f83d0dd commit 699c0b8

File tree

6 files changed

+83
-37
lines changed

6 files changed

+83
-37
lines changed

cluster-autoscaler/cloudprovider/clusterapi/clusterapi_controller.go

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -336,25 +336,22 @@ func (c *machineController) machineSetProviderIDs(machineSet *MachineSet) ([]str
336336
return nil, fmt.Errorf("error listing machines: %v", err)
337337
}
338338

339-
var nodes []string
340-
339+
var providerIDs []string
341340
for _, machine := range machines {
341+
if machine.Spec.ProviderID == nil || *machine.Spec.ProviderID == "" {
342+
klog.Warningf("Machine %q has no providerID", machine.Name)
343+
}
344+
342345
if machine.Spec.ProviderID != nil && *machine.Spec.ProviderID != "" {
343-
// Prefer machine<=>node mapping using ProviderID
344-
node, err := c.findNodeByProviderID(*machine.Spec.ProviderID)
345-
if err != nil {
346-
return nil, err
347-
}
348-
if node != nil {
349-
nodes = append(nodes, node.Spec.ProviderID)
350-
continue
351-
}
346+
providerIDs = append(providerIDs, *machine.Spec.ProviderID)
347+
continue
352348
}
353349

354350
if machine.Status.NodeRef == nil {
355351
klog.V(4).Infof("Status.NodeRef of machine %q is currently nil", machine.Name)
356352
continue
357353
}
354+
358355
if machine.Status.NodeRef.Kind != "Node" {
359356
klog.Errorf("Status.NodeRef of machine %q does not reference a node (rather %q)", machine.Name, machine.Status.NodeRef.Kind)
360357
continue
@@ -366,13 +363,12 @@ func (c *machineController) machineSetProviderIDs(machineSet *MachineSet) ([]str
366363
}
367364

368365
if node != nil {
369-
nodes = append(nodes, node.Spec.ProviderID)
366+
providerIDs = append(providerIDs, node.Spec.ProviderID)
370367
}
371368
}
372369

373-
klog.V(4).Infof("nodegroup %s has nodes %v", machineSet.Name, nodes)
374-
375-
return nodes, nil
370+
klog.V(4).Infof("nodegroup %s has nodes %v", machineSet.Name, providerIDs)
371+
return providerIDs, nil
376372
}
377373

378374
func (c *machineController) filterAllMachineSets(f machineSetFilterFunc) error {

cluster-autoscaler/cloudprovider/clusterapi/clusterapi_machinedeployment.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,11 +61,11 @@ func (r machineDeploymentScalableResource) Nodes() ([]string, error) {
6161

6262
if err := r.controller.filterAllMachineSets(func(machineSet *MachineSet) error {
6363
if machineSetIsOwnedByMachineDeployment(machineSet, r.machineDeployment) {
64-
names, err := r.controller.machineSetNodeNames(machineSet)
64+
providerIDs, err := r.controller.machineSetProviderIDs(machineSet)
6565
if err != nil {
6666
return err
6767
}
68-
result = append(result, names...)
68+
result = append(result, providerIDs...)
6969
}
7070
return nil
7171
}); err != nil {

cluster-autoscaler/cloudprovider/clusterapi/clusterapi_machineset.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ func (r machineSetScalableResource) Namespace() string {
5757
}
5858

5959
func (r machineSetScalableResource) Nodes() ([]string, error) {
60-
return r.controller.machineSetNodeNames(r.machineSet)
60+
return r.controller.machineSetProviderIDs(r.machineSet)
6161
}
6262

6363
func (r machineSetScalableResource) Replicas() int32 {

cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ func (ng *nodegroup) Debug() string {
179179
}
180180

181181
// Nodes returns a list of all nodes that belong to this node group.
182+
// This includes instances that might have not become a kubernetes node yet.
182183
func (ng *nodegroup) Nodes() ([]cloudprovider.Instance, error) {
183184
nodes, err := ng.scalableResource.Nodes()
184185
if err != nil {

cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go

Lines changed: 67 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -401,10 +401,12 @@ func TestNodeGroupIncreaseSize(t *testing.T) {
401401

402402
func TestNodeGroupDecreaseTargetSize(t *testing.T) {
403403
type testCase struct {
404-
description string
405-
delta int
406-
initial int32
407-
expected int32
404+
description string
405+
delta int
406+
initial int32
407+
targetSizeIncrement int32
408+
expected int32
409+
expectedError bool
408410
}
409411

410412
test := func(t *testing.T, tc *testCase, testConfig *testConfig) {
@@ -421,17 +423,49 @@ func TestNodeGroupDecreaseTargetSize(t *testing.T) {
421423
}
422424

423425
ng := nodegroups[0]
424-
currReplicas, err := ng.TargetSize()
425-
if err := controller.machineSetInformer.Informer().GetStore().Add(newUnstructuredFromMachineSet(testConfig.machineSet)); err != nil {
426-
}
426+
// DecreaseTargetSize should only decrease the size when the current target size of the nodeGroup
427+
// is bigger than the number existing instances for that group. We force such a scenario with targetSizeIncrement.
428+
switch v := (ng.scalableResource).(type) {
429+
case *machineSetScalableResource:
430+
testConfig.machineSet.Spec.Replicas = int32ptr(*testConfig.machineSet.Spec.Replicas + tc.targetSizeIncrement)
431+
ms, err := ng.machineapiClient.MachineSets(ng.Namespace()).Update(testConfig.machineSet)
432+
if err != nil {
433+
t.Fatalf("unexpected error: %v", err)
434+
}
427435

428-
if err := controller.nodeInformer.GetStore().Delete(testConfig.nodes[0]); err != nil {
429-
if err := controller.machineDeploymentInformer.Informer().GetStore().Add(newUnstructuredFromMachineDeployment(testConfig.machineDeployment)); err != nil {
436+
if err := controller.machineSetInformer.Informer().GetStore().Add(ms); err != nil {
437+
t.Fatalf("failed to add new machine: %v", err)
438+
}
439+
case *machineDeploymentScalableResource:
440+
testConfig.machineDeployment.Spec.Replicas = int32ptr(*testConfig.machineDeployment.Spec.Replicas + tc.targetSizeIncrement)
441+
md, err := ng.machineapiClient.MachineDeployments(ng.Namespace()).Update(testConfig.machineDeployment)
442+
if err != nil {
443+
t.Fatalf("unexpected error: %v", err)
444+
}
445+
if err := controller.machineDeploymentInformer.Informer().GetStore().Add(md); err != nil {
446+
t.Fatalf("failed to add new machine: %v", err)
447+
}
448+
default:
449+
t.Errorf("unexpected type: %T", v)
430450
}
451+
// A nodegroup is immutable; get a fresh copy after adding targetSizeIncrement.
452+
nodegroups, err = controller.nodeGroups()
453+
if err != nil {
454+
t.Fatalf("unexpected error: %v", err)
455+
}
456+
ng = nodegroups[0]
431457

432-
if err := ng.DecreaseTargetSize(tc.delta); err != nil {
458+
currReplicas, err := ng.TargetSize()
459+
if err != nil {
433460
t.Fatalf("unexpected error: %v", err)
434461
}
462+
if currReplicas != int(tc.initial)+int(tc.targetSizeIncrement) {
463+
t.Errorf("initially expected %v, got %v", tc.initial, currReplicas)
464+
}
465+
466+
if err := ng.DecreaseTargetSize(tc.delta); (err != nil) != tc.expectedError {
467+
t.Fatalf("expected error: %v, got: %v", tc.expectedError, err)
468+
}
435469

436470
switch v := (ng.scalableResource).(type) {
437471
case *machineSetScalableResource:
@@ -464,20 +498,35 @@ func TestNodeGroupDecreaseTargetSize(t *testing.T) {
464498

465499
t.Run("MachineSet", func(t *testing.T) {
466500
tc := testCase{
467-
description: "decrease by 1",
468-
initial: 3,
469-
expected: 2,
470-
delta: -1,
501+
description: "Same number of existing instances and node group target size should error",
502+
initial: 3,
503+
targetSizeIncrement: 0,
504+
expected: 3,
505+
delta: -1,
506+
expectedError: true,
507+
}
508+
test(t, &tc, createMachineSetTestConfig(testNamespace, int(tc.initial), annotations))
509+
})
510+
511+
t.Run("MachineSet", func(t *testing.T) {
512+
tc := testCase{
513+
description: "A node group with targe size 4 but only 3 existing instances should decrease by 1",
514+
initial: 3,
515+
targetSizeIncrement: 1,
516+
expected: 3,
517+
delta: -1,
471518
}
472519
test(t, &tc, createMachineSetTestConfig(testNamespace, int(tc.initial), annotations))
473520
})
474521

475522
t.Run("MachineDeployment", func(t *testing.T) {
476523
tc := testCase{
477-
description: "decrease by 1",
478-
initial: 3,
479-
expected: 2,
480-
delta: -1,
524+
description: "Same number of existing instances and node group target size should error",
525+
initial: 3,
526+
targetSizeIncrement: 0,
527+
expected: 3,
528+
delta: -1,
529+
expectedError: true,
481530
}
482531
test(t, &tc, createMachineDeploymentTestConfig(testNamespace, int(tc.initial), annotations))
483532
})

cluster-autoscaler/cloudprovider/clusterapi/clusterapi_scalableresource.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ type scalableResource interface {
3434
// Namespace returns the namespace the resource is in
3535
Namespace() string
3636

37-
// Nodes returns a list of all nodes that belong to this
37+
// Nodes returns a list of all machines that already have or should become nodes that belong to this
3838
// resource
3939
Nodes() ([]string, error)
4040

0 commit comments

Comments
 (0)