Skip to content

Commit e81b680

Browse files
author
Yuvaraj Kakaraparthi
committed
in-place propagation from MS to Machines
1 parent 193f13a commit e81b680

File tree

5 files changed

+828
-67
lines changed

5 files changed

+828
-67
lines changed

internal/controllers/machineset/machineset_controller.go

Lines changed: 131 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import (
2828
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2929
"k8s.io/apimachinery/pkg/labels"
3030
kerrors "k8s.io/apimachinery/pkg/util/errors"
31+
"k8s.io/apiserver/pkg/storage/names"
3132
"k8s.io/client-go/tools/record"
3233
"k8s.io/klog/v2"
3334
ctrl "sigs.k8s.io/controller-runtime"
@@ -42,6 +43,7 @@ import (
4243
"sigs.k8s.io/cluster-api/controllers/remote"
4344
"sigs.k8s.io/cluster-api/internal/controllers/machine"
4445
capilabels "sigs.k8s.io/cluster-api/internal/labels"
46+
"sigs.k8s.io/cluster-api/internal/util/ssa"
4547
"sigs.k8s.io/cluster-api/util"
4648
"sigs.k8s.io/cluster-api/util/annotations"
4749
"sigs.k8s.io/cluster-api/util/collections"
@@ -64,6 +66,8 @@ var (
6466
stateConfirmationInterval = 100 * time.Millisecond
6567
)
6668

69+
const machineSetManagerName = "capi-machineset"
70+
6771
// +kubebuilder:rbac:groups=core,resources=events,verbs=get;list;watch;create;patch
6872
// +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch
6973
// +kubebuilder:rbac:groups=core,resources=nodes,verbs=get;list;watch;create;update;patch;delete
@@ -290,39 +294,6 @@ func (r *Reconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster,
290294
filteredMachines = append(filteredMachines, machine)
291295
}
292296

293-
// If not already present, add a label specifying the MachineSet name to Machines.
294-
// Ensure all required labels exist on the controlled Machines.
295-
// This logic is needed to add the `cluster.x-k8s.io/set-name` label to Machines
296-
// which were created before the `cluster.x-k8s.io/set-name` label was added to
297-
// all Machines created by a MachineSet or if a user manually removed the label.
298-
for _, machine := range filteredMachines {
299-
mdNameOnMachineSet, mdNameSetOnMachineSet := machineSet.Labels[clusterv1.MachineDeploymentNameLabel]
300-
mdNameOnMachine := machine.Labels[clusterv1.MachineDeploymentNameLabel]
301-
302-
// Note: MustEqualValue is used here as the value of this label will be a hash if the MachineSet name is longer than 63 characters.
303-
if msNameLabelValue, ok := machine.Labels[clusterv1.MachineSetNameLabel]; ok && capilabels.MustEqualValue(machineSet.Name, msNameLabelValue) &&
304-
(!mdNameSetOnMachineSet || mdNameOnMachineSet == mdNameOnMachine) {
305-
// Continue if the MachineSet name label is already set correctly and
306-
// either the MachineDeployment name label is not set on the MachineSet or
307-
// the MachineDeployment name label is set correctly on the Machine.
308-
continue
309-
}
310-
311-
helper, err := patch.NewHelper(machine, r.Client)
312-
if err != nil {
313-
return ctrl.Result{}, errors.Wrapf(err, "failed to apply %s label to Machine %q", clusterv1.MachineSetNameLabel, machine.Name)
314-
}
315-
// Note: MustFormatValue is used here as the value of this label will be a hash if the MachineSet name is longer than 63 characters.
316-
machine.Labels[clusterv1.MachineSetNameLabel] = capilabels.MustFormatValue(machineSet.Name)
317-
// Propagate the MachineDeploymentNameLabel from MachineSet to Machine if it is set on the MachineSet.
318-
if mdNameSetOnMachineSet {
319-
machine.Labels[clusterv1.MachineDeploymentNameLabel] = mdNameOnMachineSet
320-
}
321-
if err := helper.Patch(ctx, machine); err != nil {
322-
return ctrl.Result{}, errors.Wrapf(err, "failed to apply %s label to Machine %q", clusterv1.MachineSetNameLabel, machine.Name)
323-
}
324-
}
325-
326297
// Remediate failed Machines by deleting them.
327298
var errs []error
328299
for _, machine := range filteredMachines {
@@ -352,6 +323,10 @@ func (r *Reconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster,
352323
return ctrl.Result{}, errors.Wrap(err, "failed to remediate machines")
353324
}
354325

326+
if err := r.syncMachines(ctx, machineSet, filteredMachines); err != nil {
327+
return ctrl.Result{}, errors.Wrap(err, "failed to update Machines")
328+
}
329+
355330
syncErr := r.syncReplicas(ctx, machineSet, filteredMachines)
356331

357332
// Always updates status as machines come up or die.
@@ -389,6 +364,43 @@ func (r *Reconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster,
389364
return ctrl.Result{}, nil
390365
}
391366

367+
// syncMachines updates Machines to propagate in-place mutable fields from the MachineSet.
368+
// Note: It also cleans up managed fields of all Machines so that Machines that were created/patched before (< v1.4.0)
369+
// the controller adopted Server-Side-Apply (SSA) can also work with SSA. Otherwise fields would be co-owned by
370+
// our "old" "manager" and "capi-machineset" and then we would not be able to e.g. drop labels and annotations.
371+
// TODO: update the labels and annotations to the corresponding infra machines and the boostrap configs of the filtered machines.
372+
func (r *Reconciler) syncMachines(ctx context.Context, machineSet *clusterv1.MachineSet, machines []*clusterv1.Machine) error {
373+
log := ctrl.LoggerFrom(ctx)
374+
for i := range machines {
375+
m := machines[i]
376+
// If the machine is already being deleted, we don't need to update it.
377+
if !m.DeletionTimestamp.IsZero() {
378+
continue
379+
}
380+
381+
// Cleanup managed fields of all Machines.
382+
// We do this so that Machines that were created/patched before the controller adopted Server-Side-Apply (SSA)
383+
// (< v1.4.0) can also work with SSA. Otherwise, fields would be co-owned by our "old" "manager" and
384+
// "capi-machineset" and then we would not be able to e.g. drop labels and annotations.
385+
if err := ssa.CleanUpManagedFieldsForSSAAdoption(ctx, m, machineSetManagerName, r.Client); err != nil {
386+
return errors.Wrapf(err, "failed to update machine: failed to adjust the managedFields of the Machine %q", m.Name)
387+
}
388+
389+
// Update Machine to propagate in-place mutable fields from the MachineSet.
390+
updatedMachine := r.computeDesiredMachine(machineSet, m)
391+
patchOptions := []client.PatchOption{
392+
client.ForceOwnership,
393+
client.FieldOwner(machineSetManagerName),
394+
}
395+
if err := r.Client.Patch(ctx, updatedMachine, client.Apply, patchOptions...); err != nil {
396+
log.Error(err, "failed to update Machine", "Machine", klog.KObj(updatedMachine))
397+
return errors.Wrapf(err, "failed to update Machine %q", klog.KObj(updatedMachine))
398+
}
399+
machines[i] = updatedMachine
400+
}
401+
return nil
402+
}
403+
392404
// syncReplicas scales Machine resources up or down.
393405
func (r *Reconciler) syncReplicas(ctx context.Context, ms *clusterv1.MachineSet, machines []*clusterv1.Machine) error {
394406
log := ctrl.LoggerFrom(ctx)
@@ -414,18 +426,18 @@ func (r *Reconciler) syncReplicas(ctx context.Context, ms *clusterv1.MachineSet,
414426
for i := 0; i < diff; i++ {
415427
// Create a new logger so the global logger is not modified.
416428
log := log
417-
machine := r.getNewMachine(ms)
418-
429+
machine := r.computeDesiredMachine(ms, nil)
419430
// Clone and set the infrastructure and bootstrap references.
420431
var (
421432
infraRef, bootstrapRef *corev1.ObjectReference
422433
err error
423434
)
424435

425-
if machine.Spec.Bootstrap.ConfigRef != nil {
436+
// Create the BootstrapConfig if necessary.
437+
if ms.Spec.Template.Spec.Bootstrap.ConfigRef != nil {
426438
bootstrapRef, err = external.CreateFromTemplate(ctx, &external.CreateFromTemplateInput{
427439
Client: r.Client,
428-
TemplateRef: machine.Spec.Bootstrap.ConfigRef,
440+
TemplateRef: ms.Spec.Template.Spec.Bootstrap.ConfigRef,
429441
Namespace: machine.Namespace,
430442
ClusterName: machine.Spec.ClusterName,
431443
Labels: machine.Labels,
@@ -439,15 +451,18 @@ func (r *Reconciler) syncReplicas(ctx context.Context, ms *clusterv1.MachineSet,
439451
})
440452
if err != nil {
441453
conditions.MarkFalse(ms, clusterv1.MachinesCreatedCondition, clusterv1.BootstrapTemplateCloningFailedReason, clusterv1.ConditionSeverityError, err.Error())
442-
return errors.Wrapf(err, "failed to clone bootstrap configuration from %s %s while creating a machine", machine.Spec.Bootstrap.ConfigRef.Kind, klog.KRef(machine.Spec.Bootstrap.ConfigRef.Namespace, machine.Spec.Bootstrap.ConfigRef.Name))
454+
return errors.Wrapf(err, "failed to clone bootstrap configuration from %s %s while creating a machine",
455+
ms.Spec.Template.Spec.Bootstrap.ConfigRef.Kind,
456+
klog.KRef(ms.Spec.Template.Spec.Bootstrap.ConfigRef.Namespace, ms.Spec.Template.Spec.Bootstrap.ConfigRef.Name))
443457
}
444458
machine.Spec.Bootstrap.ConfigRef = bootstrapRef
445459
log = log.WithValues(bootstrapRef.Kind, klog.KRef(bootstrapRef.Namespace, bootstrapRef.Name))
446460
}
447461

462+
// Create the InfraMachine.
448463
infraRef, err = external.CreateFromTemplate(ctx, &external.CreateFromTemplateInput{
449464
Client: r.Client,
450-
TemplateRef: &machine.Spec.InfrastructureRef,
465+
TemplateRef: &ms.Spec.Template.Spec.InfrastructureRef,
451466
Namespace: machine.Namespace,
452467
ClusterName: machine.Spec.ClusterName,
453468
Labels: machine.Labels,
@@ -461,12 +476,19 @@ func (r *Reconciler) syncReplicas(ctx context.Context, ms *clusterv1.MachineSet,
461476
})
462477
if err != nil {
463478
conditions.MarkFalse(ms, clusterv1.MachinesCreatedCondition, clusterv1.InfrastructureTemplateCloningFailedReason, clusterv1.ConditionSeverityError, err.Error())
464-
return errors.Wrapf(err, "failed to clone infrastructure machine from %s %s while creating a machine", machine.Spec.InfrastructureRef.Kind, klog.KRef(machine.Spec.InfrastructureRef.Namespace, machine.Spec.InfrastructureRef.Name))
479+
return errors.Wrapf(err, "failed to clone infrastructure machine from %s %s while creating a machine",
480+
ms.Spec.Template.Spec.InfrastructureRef.Kind,
481+
klog.KRef(ms.Spec.Template.Spec.InfrastructureRef.Namespace, ms.Spec.Template.Spec.InfrastructureRef.Name))
465482
}
466483
log = log.WithValues(infraRef.Kind, klog.KRef(infraRef.Namespace, infraRef.Name))
467484
machine.Spec.InfrastructureRef = *infraRef
468485

469-
if err := r.Client.Create(ctx, machine); err != nil {
486+
// Create the Machine.
487+
patchOptions := []client.PatchOption{
488+
client.ForceOwnership,
489+
client.FieldOwner(machineSetManagerName),
490+
}
491+
if err := r.Client.Patch(ctx, machine, client.Apply, patchOptions...); err != nil {
470492
log.Error(err, "Error while creating a machine")
471493
r.recorder.Eventf(ms, corev1.EventTypeWarning, "FailedCreate", "Failed to create machine: %v", err)
472494
errs = append(errs, err)
@@ -529,45 +551,87 @@ func (r *Reconciler) syncReplicas(ctx context.Context, ms *clusterv1.MachineSet,
529551
return nil
530552
}
531553

532-
// getNewMachine creates a new Machine object. The name of the newly created resource is going
533-
// to be created by the API server, we set the generateName field.
534-
func (r *Reconciler) getNewMachine(machineSet *clusterv1.MachineSet) *clusterv1.Machine {
535-
gv := clusterv1.GroupVersion
536-
machine := &clusterv1.Machine{
554+
// computeDesiredMachine computes the desired Machine.
555+
// This Machine will be used during reconciliation to:
556+
// * create a Machine
557+
// * update an existing Machine
558+
// Because we are using Server-Side-Apply we always have to calculate the full object.
559+
// There are small differences in how we calculate the Machine depending on if it
560+
// is a create or update. Example: for a new Machine we have to calculate a new name,
561+
// while for an existing Machine we have to use the name of the existing Machine.
562+
func (r *Reconciler) computeDesiredMachine(machineSet *clusterv1.MachineSet, existingMachine *clusterv1.Machine) *clusterv1.Machine {
563+
desiredMachine := &clusterv1.Machine{
564+
TypeMeta: metav1.TypeMeta{
565+
APIVersion: clusterv1.GroupVersion.String(),
566+
Kind: "Machine",
567+
},
537568
ObjectMeta: metav1.ObjectMeta{
538-
GenerateName: fmt.Sprintf("%s-", machineSet.Name),
539-
// Note: by setting the ownerRef on creation we signal to the Machine controller that this is not a stand-alone Machine.
569+
Name: names.SimpleNameGenerator.GenerateName(fmt.Sprintf("%s-", machineSet.Name)),
570+
Namespace: machineSet.Namespace,
571+
// Note: By setting the ownerRef on creation we signal to the Machine controller that this is not a stand-alone Machine.
540572
OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(machineSet, machineSetKind)},
541-
Namespace: machineSet.Namespace,
542-
Labels: make(map[string]string),
543-
Annotations: machineSet.Spec.Template.Annotations,
544-
},
545-
TypeMeta: metav1.TypeMeta{
546-
Kind: gv.WithKind("Machine").Kind,
547-
APIVersion: gv.String(),
573+
Labels: map[string]string{},
574+
Annotations: map[string]string{},
548575
},
549-
Spec: machineSet.Spec.Template.Spec,
550-
}
551-
machine.Spec.ClusterName = machineSet.Spec.ClusterName
552-
553-
// Set the labels from machineSet.Spec.Template.Labels as labels for the new Machine.
576+
Spec: *machineSet.Spec.Template.Spec.DeepCopy(),
577+
}
578+
// Set ClusterName.
579+
desiredMachine.Spec.ClusterName = machineSet.Spec.ClusterName
580+
581+
// Clean up the refs to the incorrect objects.
582+
// The InfrastructureRef and the Bootstrap.ConfigRef in Machine should point to the InfrastructureMachine
583+
// and the BootstrapConfig objects. In the MachineSet these values point to InfrastructureMachineTemplate
584+
// BootstrapConfigTemplate. Drop the values that were copied over from MachineSet during DeepCopy
585+
// to make sure to not point to incorrect refs.
586+
// Note: During Machine creation, these refs will be updated with the correct values after the corresponding
587+
// objects are created.
588+
desiredMachine.Spec.InfrastructureRef = corev1.ObjectReference{}
589+
desiredMachine.Spec.Bootstrap.ConfigRef = nil
590+
591+
// If we are updating an existing Machine reuse the name, uid, infrastructureRef and bootstrap.configRef
592+
// from the existingMachine.
593+
// Note: we use UID to force SSA to update the existing Machine and to not accidentally create a new Machine.
594+
// infrastructureRef and bootstrap.configRef remain the same for an existing Machine.
595+
if existingMachine != nil {
596+
desiredMachine.SetName(existingMachine.Name)
597+
desiredMachine.SetUID(existingMachine.UID)
598+
desiredMachine.Spec.Bootstrap.ConfigRef = existingMachine.Spec.Bootstrap.ConfigRef
599+
desiredMachine.Spec.InfrastructureRef = existingMachine.Spec.InfrastructureRef
600+
}
601+
602+
// Set the in-place mutable fields.
603+
// When we create a new Machine we will just create the Machine with those fields.
604+
// When we update an existing Machine will we update the fields on the existing Machine (in-place mutate).
605+
606+
// Set Labels
554607
// Note: We can't just set `machineSet.Spec.Template.Labels` directly and thus "share" the labels
555608
// map between Machine and machineSet.Spec.Template.Labels. This would mean that adding the
556609
// MachineSetNameLabel and MachineDeploymentNameLabel later on the Machine would also add the labels
557610
// to machineSet.Spec.Template.Labels and thus modify the labels of the MachineSet.
558611
for k, v := range machineSet.Spec.Template.Labels {
559-
machine.Labels[k] = v
612+
desiredMachine.Labels[k] = v
560613
}
561-
562-
// Enforce that the MachineSetNameLabel label is set
563-
// Note: the MachineSetNameLabel is added by the default webhook to MachineSet.spec.template.labels if a spec.selector is empty.
564-
machine.Labels[clusterv1.MachineSetNameLabel] = capilabels.MustFormatValue(machineSet.Name)
614+
// Always set the MachineSetNameLabel.
615+
// Note: If a client tries to create a MachineSet without a selector, the MachineSet webhook
616+
// will add this label automatically. But we want this label to always be present even if the MachineSet
617+
// has a selector which doesn't include it. Therefore, we have to set it here explicitly.
618+
desiredMachine.Labels[clusterv1.MachineSetNameLabel] = capilabels.MustFormatValue(machineSet.Name)
565619
// Propagate the MachineDeploymentNameLabel from MachineSet to Machine if it exists.
566620
if mdName, ok := machineSet.Labels[clusterv1.MachineDeploymentNameLabel]; ok {
567-
machine.Labels[clusterv1.MachineDeploymentNameLabel] = mdName
621+
desiredMachine.Labels[clusterv1.MachineDeploymentNameLabel] = mdName
622+
}
623+
624+
// Set Annotations
625+
for k, v := range machineSet.Spec.Template.Annotations {
626+
desiredMachine.Annotations[k] = v
568627
}
569628

570-
return machine
629+
// Set all other in-place mutable fields.
630+
desiredMachine.Spec.NodeDrainTimeout = machineSet.Spec.Template.Spec.NodeDrainTimeout
631+
desiredMachine.Spec.NodeDeletionTimeout = machineSet.Spec.Template.Spec.NodeDeletionTimeout
632+
desiredMachine.Spec.NodeVolumeDetachTimeout = machineSet.Spec.Template.Spec.NodeVolumeDetachTimeout
633+
634+
return desiredMachine
571635
}
572636

573637
// shouldExcludeMachine returns true if the machine should be filtered out, false otherwise.

0 commit comments

Comments
 (0)