Skip to content

Commit addb065

Browse files
committed
MS: improve replica defaulting for autoscaler
1 parent 0eb51f0 commit addb065

File tree

11 files changed

+368
-39
lines changed

11 files changed

+368
-39
lines changed

api/v1beta1/machineset_types.go

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,22 @@ type MachineSetSpec struct {
4141

4242
// Replicas is the number of desired replicas.
4343
// This is a pointer to distinguish between explicit zero and unspecified.
44-
// Defaults to 1.
44+
//
45+
// Defaults to:
46+
// * if the Kubernetes autoscaler min size and max size annotations are set:
47+
// - if it's a new MachineSet, use min size
48+
// - if the replicas field of the old MachineSet is < min size, use min size
49+
// - if the replicas field of the old MachineSet is > max size, use max size
50+
// - if the replicas field of the old MachineSet is in the (min size, max size) range, keep the value from the oldMS
51+
// * otherwise use 1
52+
// Note: Defaulting will be run whenever the replicas field is not set:
53+
// * A new MachineSet is created with replicas not set.
54+
// * On an existing MachineSet the replicas field was first set and is now unset.
55+
// Those cases are especially relevant for the following Kubernetes autoscaler use cases:
56+
// * A new MachineSet is created and replicas should be managed by the autoscaler
57+
// * An existing MachineSet which initially wasn't controlled by the autoscaler
58+
// should be later controlled by the autoscaler
4559
// +optional
46-
// +kubebuilder:default=1
4760
Replicas *int32 `json:"replicas,omitempty"`
4861

4962
// MinReadySeconds is the minimum number of seconds for which a Node for a newly created machine should be ready before considering the replica available.

api/v1beta1/zz_generated.openapi.go

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/bases/cluster.x-k8s.io_machinesets.yaml

Lines changed: 16 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/book/src/tasks/automated-machine-management/autoscaling.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,17 +12,17 @@ from the [Autoscaler project documentation](https://github.com/kubernetes/autosc
1212

1313
<aside class="note warning">
1414

15-
<h1>Defaulting of the MachineDeployment replicas field</h1>
15+
<h1>Defaulting of the MachineDeployment, MachineSet replicas field</h1>
1616

17-
Please note that the MachineDeployment replicas field has special defaulting logic to provide a smooth integration with the autoscaler.
18-
The replica field is defaulted based on the autoscaler min and max size annotations.The goal is to pick a default value which is inside
17+
Please note that the MachineDeployment and MachineSet replicas field has special defaulting logic to provide a smooth integration with the autoscaler.
18+
The replica field is defaulted based on the autoscaler min and max size annotations.The goal is to pick a default value which is inside
1919
the (min size, max size) range so the autoscaler can take control of the replicase field.
2020

2121
The defaulting logic is as follows:
2222
* if the autoscaler min size and max size annotations are set:
23-
* if it's a new MachineDeployment, use min size
24-
* if the replicas field of the old MachineDeployment is < min size, use min size
25-
* if the replicas field of the old MachineDeployment is > max size, use max size
26-
* if the replicas field of the old MachineDeployment is in the (min size, max size) range, keep the value from the oldMD
23+
* if it's a new MachineDeployment or MachineSet, use min size
24+
* if the replicas field of the old MachineDeployment or MachineSet is < min size, use min size
25+
* if the replicas field of the old MachineDeployment or MachineSet is > max size, use max size
26+
* if the replicas field of the old MachineDeployment or MachineSet is in the (min size, max size) range, keep the value from the oldMD or oldMS
2727
* otherwise, use 1
2828
</aside>

internal/controllers/machinehealthcheck/machinehealthcheck_controller_test.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ import (
4040
"sigs.k8s.io/controller-runtime/pkg/client/fake"
4141
"sigs.k8s.io/controller-runtime/pkg/log"
4242
"sigs.k8s.io/controller-runtime/pkg/reconcile"
43+
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
4344

4445
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
4546
"sigs.k8s.io/cluster-api/api/v1beta1/index"
@@ -1249,7 +1250,9 @@ func TestMachineHealthCheck_Reconcile(t *testing.T) {
12491250
},
12501251
},
12511252
}
1252-
g.Expect((&webhooks.MachineSet{}).Default(ctx, machineSet)).Should(Succeed())
1253+
1254+
reqCtx := admission.NewContextWithRequest(ctx, admission.Request{})
1255+
g.Expect((&webhooks.MachineSet{}).Default(reqCtx, machineSet)).Should(Succeed())
12531256
g.Expect(env.Create(ctx, machineSet)).To(Succeed())
12541257

12551258
// Ensure machines have been created.

internal/controllers/topology/cluster/reconcile_state_test.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3241,9 +3241,8 @@ func newFakeMachineDeploymentTopologyState(name string, infrastructureMachineTem
32413241

32423242
scheme := runtime.NewScheme()
32433243
_ = clusterv1.AddToScheme(scheme)
3244-
if err := (&webhooks.MachineDeployment{
3245-
Decoder: admission.NewDecoder(scheme),
3246-
}).Default(admission.NewContextWithRequest(ctx, admission.Request{}), mdState.Object); err != nil {
3244+
if err := (&webhooks.MachineDeployment{}).
3245+
Default(admission.NewContextWithRequest(ctx, admission.Request{}), mdState.Object); err != nil {
32473246
panic(err)
32483247
}
32493248
return mdState

internal/webhooks/machinedeployment.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ import (
4242
)
4343

4444
func (webhook *MachineDeployment) SetupWebhookWithManager(mgr ctrl.Manager) error {
45-
if webhook.Decoder == nil {
46-
webhook.Decoder = admission.NewDecoder(mgr.GetScheme())
45+
if webhook.decoder == nil {
46+
webhook.decoder = admission.NewDecoder(mgr.GetScheme())
4747
}
4848

4949
return ctrl.NewWebhookManagedBy(mgr).
@@ -58,7 +58,7 @@ func (webhook *MachineDeployment) SetupWebhookWithManager(mgr ctrl.Manager) erro
5858

5959
// MachineDeployment implements a validation and defaulting webhook for MachineDeployment.
6060
type MachineDeployment struct {
61-
Decoder *admission.Decoder
61+
decoder *admission.Decoder
6262
}
6363

6464
var _ webhook.CustomDefaulter = &MachineDeployment{}
@@ -83,7 +83,7 @@ func (webhook *MachineDeployment) Default(ctx context.Context, obj runtime.Objec
8383
var oldMD *clusterv1.MachineDeployment
8484
if req.Operation == v1.Update {
8585
oldMD = &clusterv1.MachineDeployment{}
86-
if err := webhook.Decoder.DecodeRaw(req.OldObject, oldMD); err != nil {
86+
if err := webhook.decoder.DecodeRaw(req.OldObject, oldMD); err != nil {
8787
return errors.Wrapf(err, "failed to decode oldObject to MachineDeployment")
8888
}
8989
}

internal/webhooks/machinedeployment_test.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ func TestMachineDeploymentDefault(t *testing.T) {
5252
scheme := runtime.NewScheme()
5353
g.Expect(clusterv1.AddToScheme(scheme)).To(Succeed())
5454
webhook := &MachineDeployment{
55-
Decoder: admission.NewDecoder(scheme),
55+
decoder: admission.NewDecoder(scheme),
5656
}
5757

5858
reqCtx := admission.NewContextWithRequest(ctx, admission.Request{
@@ -403,7 +403,7 @@ func TestMachineDeploymentValidation(t *testing.T) {
403403
scheme := runtime.NewScheme()
404404
g.Expect(clusterv1.AddToScheme(scheme)).To(Succeed())
405405
webhook := MachineDeployment{
406-
Decoder: admission.NewDecoder(scheme),
406+
decoder: admission.NewDecoder(scheme),
407407
}
408408

409409
if tt.expectErr {
@@ -475,7 +475,7 @@ func TestMachineDeploymentVersionValidation(t *testing.T) {
475475
scheme := runtime.NewScheme()
476476
g.Expect(clusterv1.AddToScheme(scheme)).To(Succeed())
477477
webhook := MachineDeployment{
478-
Decoder: admission.NewDecoder(scheme),
478+
decoder: admission.NewDecoder(scheme),
479479
}
480480

481481
if tt.expectErr {
@@ -537,7 +537,7 @@ func TestMachineDeploymentClusterNameImmutable(t *testing.T) {
537537
scheme := runtime.NewScheme()
538538
g.Expect(clusterv1.AddToScheme(scheme)).To(Succeed())
539539
webhook := MachineDeployment{
540-
Decoder: admission.NewDecoder(scheme),
540+
decoder: admission.NewDecoder(scheme),
541541
}
542542

543543
warnings, err := webhook.ValidateUpdate(ctx, oldMD, newMD)
@@ -589,7 +589,7 @@ func TestMachineDeploymentTemplateMetadataValidation(t *testing.T) {
589589
scheme := runtime.NewScheme()
590590
g.Expect(clusterv1.AddToScheme(scheme)).To(Succeed())
591591
webhook := MachineDeployment{
592-
Decoder: admission.NewDecoder(scheme),
592+
decoder: admission.NewDecoder(scheme),
593593
}
594594

595595
if tt.expectErr {

internal/webhooks/machineset.go

Lines changed: 137 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,18 @@ package webhooks
1919
import (
2020
"context"
2121
"fmt"
22+
"strconv"
2223
"strings"
2324

25+
"github.com/pkg/errors"
26+
v1 "k8s.io/api/admission/v1"
2427
apierrors "k8s.io/apimachinery/pkg/api/errors"
2528
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2629
"k8s.io/apimachinery/pkg/labels"
2730
"k8s.io/apimachinery/pkg/runtime"
2831
"k8s.io/apimachinery/pkg/util/sets"
2932
"k8s.io/apimachinery/pkg/util/validation/field"
33+
"k8s.io/utils/pointer"
3034
ctrl "sigs.k8s.io/controller-runtime"
3135
"sigs.k8s.io/controller-runtime/pkg/client"
3236
"sigs.k8s.io/controller-runtime/pkg/webhook"
@@ -39,6 +43,10 @@ import (
3943
)
4044

4145
func (webhook *MachineSet) SetupWebhookWithManager(mgr ctrl.Manager) error {
46+
if webhook.decoder == nil {
47+
webhook.decoder = admission.NewDecoder(mgr.GetScheme())
48+
}
49+
4250
return ctrl.NewWebhookManagedBy(mgr).
4351
For(&clusterv1.MachineSet{}).
4452
WithDefaulter(webhook).
@@ -50,23 +58,49 @@ func (webhook *MachineSet) SetupWebhookWithManager(mgr ctrl.Manager) error {
5058
// +kubebuilder:webhook:verbs=create;update,path=/mutate-cluster-x-k8s-io-v1beta1-machineset,mutating=true,failurePolicy=fail,matchPolicy=Equivalent,groups=cluster.x-k8s.io,resources=machinesets,versions=v1beta1,name=default.machineset.cluster.x-k8s.io,sideEffects=None,admissionReviewVersions=v1;v1beta1
5159

5260
// MachineSet implements a validation and defaulting webhook for MachineSet.
53-
type MachineSet struct{}
61+
type MachineSet struct {
62+
decoder *admission.Decoder
63+
}
5464

5565
var _ webhook.CustomDefaulter = &MachineSet{}
5666
var _ webhook.CustomValidator = &MachineSet{}
5767

5868
// Default sets default MachineSet field values.
59-
func (webhook *MachineSet) Default(_ context.Context, obj runtime.Object) error {
69+
func (webhook *MachineSet) Default(ctx context.Context, obj runtime.Object) error {
6070
m, ok := obj.(*clusterv1.MachineSet)
6171
if !ok {
6272
return apierrors.NewBadRequest(fmt.Sprintf("expected a MachineSet but got a %T", obj))
6373
}
6474

75+
req, err := admission.RequestFromContext(ctx)
76+
if err != nil {
77+
return err
78+
}
79+
80+
dryRun := false
81+
if req.DryRun != nil {
82+
dryRun = *req.DryRun
83+
}
84+
85+
var oldMS *clusterv1.MachineSet
86+
if req.Operation == v1.Update {
87+
oldMS = &clusterv1.MachineSet{}
88+
if err := webhook.decoder.DecodeRaw(req.OldObject, oldMS); err != nil {
89+
return errors.Wrapf(err, "failed to decode oldObject to MachineSet")
90+
}
91+
}
92+
6593
if m.Labels == nil {
6694
m.Labels = make(map[string]string)
6795
}
6896
m.Labels[clusterv1.ClusterNameLabel] = m.Spec.ClusterName
6997

98+
replicas, err := calculateMachineSetReplicas(ctx, oldMS, m, dryRun)
99+
if err != nil {
100+
return err
101+
}
102+
m.Spec.Replicas = pointer.Int32(replicas)
103+
70104
if m.Spec.DeletePolicy == "" {
71105
randomPolicy := string(clusterv1.RandomMachineSetDeletePolicy)
72106
m.Spec.DeletePolicy = randomPolicy
@@ -219,3 +253,104 @@ func validateSkippedMachineSetPreflightChecks(o client.Object) *field.Error {
219253
}
220254
return nil
221255
}
256+
257+
// calculateMachineSetReplicas calculates the default value of the replicas field.
258+
// The value will be calculated based on the following logic:
259+
// * if replicas is already set on newMS, keep the current value
260+
// * if the autoscaler min size and max size annotations are set:
261+
// - if it's a new MachineSet, use min size
262+
// - if the replicas field of the old MachineSet is < min size, use min size
263+
// - if the replicas field of the old MachineSet is > max size, use max size
264+
// - if the replicas field of the old MachineSet is in the (min size, max size) range, keep the value from the oldMS
265+
//
266+
// * otherwise use 1
267+
//
268+
// The goal of this logic is to provide a smoother UX for clusters using the Kubernetes autoscaler.
269+
// Note: Autoscaler only takes over control of the replicas field if the replicas value is in the (min size, max size) range.
270+
//
271+
// We are supporting the following use cases:
272+
// * A new MS is created and replicas should be managed by the autoscaler
273+
// - Either via the default annotation or via the min size and max size annotations the replicas field
274+
// is defaulted to a value which is within the (min size, max size) range so the autoscaler can take control.
275+
//
276+
// * An existing MS which initially wasn't controlled by the autoscaler should be later controlled by the autoscaler
277+
// - To adopt an existing MS users can use the default, min size and max size annotations to enable the autoscaler
278+
// and to ensure the replicas field is within the (min size, max size) range. Without the annotations handing over
279+
// control to the autoscaler by unsetting the replicas field would lead to the field being set to 1. This is very
280+
// disruptive for existing Machines and if 1 is outside the (min size, max size) range the autoscaler won't take
281+
// control.
282+
//
283+
// Notes:
284+
// - While the min size and max size annotations of the autoscaler provide the best UX, other autoscalers can use the
285+
// DefaultReplicasAnnotation if they have similar use cases.
286+
func calculateMachineSetReplicas(ctx context.Context, oldMS *clusterv1.MachineSet, newMS *clusterv1.MachineSet, dryRun bool) (int32, error) {
287+
// If replicas is already set => Keep the current value.
288+
if newMS.Spec.Replicas != nil {
289+
return *newMS.Spec.Replicas, nil
290+
}
291+
292+
log := ctrl.LoggerFrom(ctx)
293+
294+
// If both autoscaler annotations are set, use them to calculate the default value.
295+
minSizeString, hasMinSizeAnnotation := newMS.Annotations[clusterv1.AutoscalerMinSizeAnnotation]
296+
maxSizeString, hasMaxSizeAnnotation := newMS.Annotations[clusterv1.AutoscalerMaxSizeAnnotation]
297+
if hasMinSizeAnnotation && hasMaxSizeAnnotation {
298+
minSize, err := strconv.ParseInt(minSizeString, 10, 32)
299+
if err != nil {
300+
return 0, errors.Wrapf(err, "failed to caculate MachineSet replicas value: could not parse the value of the %q annotation", clusterv1.AutoscalerMinSizeAnnotation)
301+
}
302+
maxSize, err := strconv.ParseInt(maxSizeString, 10, 32)
303+
if err != nil {
304+
return 0, errors.Wrapf(err, "failed to caculate MachineSet replicas value: could not parse the value of the %q annotation", clusterv1.AutoscalerMaxSizeAnnotation)
305+
}
306+
307+
// If it's a new MachineSet => Use the min size.
308+
// Note: This will result in a scale up to get into the range where autoscaler takes over.
309+
if oldMS == nil {
310+
if !dryRun {
311+
log.V(2).Info(fmt.Sprintf("Replica field has been defaulted to %d based on the %s annotation (MS is a new MS)", minSize, clusterv1.AutoscalerMinSizeAnnotation))
312+
}
313+
return int32(minSize), nil
314+
}
315+
316+
// Otherwise we are handing over the control for the replicas field for an existing MachineSet
317+
// to the autoscaler.
318+
319+
switch {
320+
// If the old MachineSet doesn't have replicas set => Use the min size.
321+
// Note: As defaulting always sets the replica field, this case should not be possible
322+
// We only have this handling to be 100% safe against panics.
323+
case oldMS.Spec.Replicas == nil:
324+
if !dryRun {
325+
log.V(2).Info(fmt.Sprintf("Replica field has been defaulted to %d based on the %s annotation (old MS didn't have replicas set)", minSize, clusterv1.AutoscalerMinSizeAnnotation))
326+
}
327+
return int32(minSize), nil
328+
// If the old MachineSet replicas are lower than min size => Use the min size.
329+
// Note: This will result in a scale up to get into the range where autoscaler takes over.
330+
case *oldMS.Spec.Replicas < int32(minSize):
331+
if !dryRun {
332+
log.V(2).Info(fmt.Sprintf("Replica field has been defaulted to %d based on the %s annotation (old MS had replicas below min size)", minSize, clusterv1.AutoscalerMinSizeAnnotation))
333+
}
334+
return int32(minSize), nil
335+
// If the old MachineSet replicas are higher than max size => Use the max size.
336+
// Note: This will result in a scale down to get into the range where autoscaler takes over.
337+
case *oldMS.Spec.Replicas > int32(maxSize):
338+
if !dryRun {
339+
log.V(2).Info(fmt.Sprintf("Replica field has been defaulted to %d based on the %s annotation (old MS had replicas above max size)", maxSize, clusterv1.AutoscalerMaxSizeAnnotation))
340+
}
341+
return int32(maxSize), nil
342+
// If the old MachineSet replicas are between min and max size => Keep the current value.
343+
default:
344+
if !dryRun {
345+
log.V(2).Info(fmt.Sprintf("Replica field has been defaulted to %d based on replicas of the old MachineSet (old MS had replicas within min size / max size range)", *oldMS.Spec.Replicas))
346+
}
347+
return *oldMS.Spec.Replicas, nil
348+
}
349+
}
350+
351+
// If neither the default nor the autoscaler annotations are set => Default to 1.
352+
if !dryRun {
353+
log.V(2).Info("Replica field has been defaulted to 1")
354+
}
355+
return 1, nil
356+
}

0 commit comments

Comments
 (0)