Skip to content

Commit 0a17fa2

Browse files
authored
Merge pull request #7591 from sbueringer/pr-adoption
🐛 Fix Machine adoption for KCP/MachineSet-owned Machines
2 parents 3e24dae + 4f536d8 commit 0a17fa2

File tree

11 files changed

+141
-24
lines changed

11 files changed

+141
-24
lines changed

api/v1beta1/machine_types.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ const (
4242
// MachineDeploymentLabelName is the label set on machines if they're controlled by MachineDeployment.
4343
MachineDeploymentLabelName = "cluster.x-k8s.io/deployment-name"
4444

45+
// MachineControlPlaneNameLabel is the label set on machines if they're controlled by a ControlPlane.
46+
MachineControlPlaneNameLabel = "cluster.x-k8s.io/control-plane-name"
47+
4548
// PreDrainDeleteHookAnnotationPrefix annotation specifies the prefix we
4649
// search each annotation for during the pre-drain.delete lifecycle hook
4750
// to pause reconciliation of deletion. These hooks will prevent removal of

controlplane/kubeadm/internal/cluster_labels.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,5 +34,6 @@ func ControlPlaneMachineLabelsForCluster(kcp *controlplanev1.KubeadmControlPlane
3434
// Always force these labels over the ones coming from the spec.
3535
labels[clusterv1.ClusterLabelName] = clusterName
3636
labels[clusterv1.MachineControlPlaneLabelName] = ""
37+
labels[clusterv1.MachineControlPlaneNameLabel] = kcp.Name
3738
return labels
3839
}

controlplane/kubeadm/internal/controllers/controller.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,19 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, cluster *
329329
// source ref (reason@machine/name) so the problem can be easily tracked down to its source machine.
330330
conditions.SetAggregate(controlPlane.KCP, controlplanev1.MachinesReadyCondition, ownedMachines.ConditionGetters(), conditions.AddSourceRef(), conditions.WithStepCounterIf(false))
331331

332+
// Ensure all required labels exist on the controlled Machines.
333+
// This logic is needed to add the `cluster.x-k8s.io/control-plane-name` label to Machines
334+
// which were created before the `cluster.x-k8s.io/control-plane-name` label was introduced
335+
// or if a user manually removed the label.
336+
// NOTE: Changes will be applied to the Machines in reconcileControlPlaneConditions.
337+
// NOTE: cluster.x-k8s.io/control-plane is already set at this stage (it is used when reading controlPlane.Machines).
338+
for i := range controlPlane.Machines {
339+
machine := controlPlane.Machines[i]
340+
if value, ok := machine.Labels[clusterv1.MachineControlPlaneNameLabel]; !ok || value != kcp.Name {
341+
machine.Labels[clusterv1.MachineControlPlaneNameLabel] = kcp.Name
342+
}
343+
}
344+
332345
// Updates conditions reporting the status of static pods and the status of the etcd cluster.
333346
// NOTE: Conditions reporting KCP operation progress like e.g. Resized or SpecUpToDate are inlined with the rest of the execution.
334347
if result, err := r.reconcileControlPlaneConditions(ctx, controlPlane); err != nil || !result.IsZero() {

controlplane/kubeadm/internal/controllers/helpers.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,7 @@ func (r *KubeadmControlPlaneReconciler) generateMachine(ctx context.Context, kcp
275275
Namespace: kcp.Namespace,
276276
Labels: internal.ControlPlaneMachineLabelsForCluster(kcp, cluster.Name),
277277
Annotations: map[string]string{},
278+
// Note: by setting the ownerRef on creation we signal to the Machine controller that this is not a stand-alone Machine.
278279
OwnerReferences: []metav1.OwnerReference{
279280
*metav1.NewControllerRef(kcp, controlplanev1.GroupVersion.WithKind("KubeadmControlPlane")),
280281
},

controlplane/kubeadm/internal/controllers/helpers_test.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -549,13 +549,18 @@ func TestKubeadmControlPlaneReconciler_generateMachine(t *testing.T) {
549549
for k, v := range kcpMachineTemplateObjectMeta.Labels {
550550
g.Expect(machine.Labels[k]).To(Equal(v))
551551
}
552+
g.Expect(machine.Labels[clusterv1.ClusterLabelName]).To(Equal(cluster.Name))
553+
g.Expect(machine.Labels[clusterv1.MachineControlPlaneLabelName]).To(Equal(""))
554+
g.Expect(machine.Labels[clusterv1.MachineControlPlaneNameLabel]).To(Equal(kcp.Name))
555+
552556
for k, v := range kcpMachineTemplateObjectMeta.Annotations {
553557
g.Expect(machine.Annotations[k]).To(Equal(v))
554558
}
555559

556560
// Verify that machineTemplate.ObjectMeta in KCP has not been modified.
557561
g.Expect(kcp.Spec.MachineTemplate.ObjectMeta.Labels).NotTo(HaveKey(clusterv1.ClusterLabelName))
558562
g.Expect(kcp.Spec.MachineTemplate.ObjectMeta.Labels).NotTo(HaveKey(clusterv1.MachineControlPlaneLabelName))
563+
g.Expect(kcp.Spec.MachineTemplate.ObjectMeta.Labels).NotTo(HaveKey(clusterv1.MachineControlPlaneNameLabel))
559564
g.Expect(kcp.Spec.MachineTemplate.ObjectMeta.Annotations).NotTo(HaveKey(controlplanev1.KubeadmClusterConfigurationAnnotation))
560565
}
561566

internal/controllers/machine/machine_controller.go

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -760,10 +760,16 @@ func (r *Reconciler) shouldAdopt(m *clusterv1.Machine) bool {
760760
return false
761761
}
762762

763-
// If the Machine is originated by a MachineDeployment, this prevents it from being adopted as a stand-alone Machine.
764-
// Note: this is required because after restore from a backup both the Machine controller and the
765-
// MachineSet controller are racing to adopt Machines, see https://github.com/kubernetes-sigs/cluster-api/issues/7529
766-
if _, ok := m.Labels[clusterv1.MachineDeploymentUniqueLabel]; ok {
763+
// Note: following checks are required because after restore from a backup both the Machine controller and the
764+
// MachineSet/ControlPlane controller are racing to adopt Machines, see https://github.com/kubernetes-sigs/cluster-api/issues/7529
765+
766+
// If the Machine is originated by a MachineSet, it should not be adopted directly by the Cluster as a stand-alone Machine.
767+
if _, ok := m.Labels[clusterv1.MachineSetLabelName]; ok {
768+
return false
769+
}
770+
771+
// If the Machine is originated by a ControlPlane object, it should not be adopted directly by the Cluster as a stand-alone Machine.
772+
if _, ok := m.Labels[clusterv1.MachineControlPlaneNameLabel]; ok {
767773
return false
768774
}
769775
return true

internal/controllers/machinedeployment/machinedeployment_controller.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,7 @@ func (r *Reconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster,
200200

201201
d.Labels[clusterv1.ClusterLabelName] = d.Spec.ClusterName
202202

203+
// Set the MachineDeployment as directly owned by the Cluster (if not already present).
203204
if r.shouldAdopt(d) {
204205
d.OwnerReferences = util.EnsureOwnerRef(d.OwnerReferences, metav1.OwnerReference{
205206
APIVersion: clusterv1.GroupVersion.String(),
@@ -226,6 +227,27 @@ func (r *Reconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster,
226227
return ctrl.Result{}, err
227228
}
228229

230+
// If not already present, add a label specifying the MachineDeployment name to MachineSets.
231+
// Ensure all required labels exist on the controlled MachineSets.
232+
// This logic is needed to add the `cluster.x-k8s.io/deployment-name` label to MachineSets
233+
// which were created before the `cluster.x-k8s.io/deployment-name` label was added
234+
// to all MachineSets created by a MachineDeployment or if a user manually removed the label.
235+
for idx := range msList {
236+
machineSet := msList[idx]
237+
if name, ok := machineSet.Labels[clusterv1.MachineDeploymentLabelName]; ok && name == d.Name {
238+
continue
239+
}
240+
241+
helper, err := patch.NewHelper(machineSet, r.Client)
242+
if err != nil {
243+
return ctrl.Result{}, errors.Wrapf(err, "failed to apply %s label to MachineSet %q", clusterv1.MachineDeploymentLabelName, machineSet.Name)
244+
}
245+
machineSet.Labels[clusterv1.MachineDeploymentLabelName] = d.Name
246+
if err := helper.Patch(ctx, machineSet); err != nil {
247+
return ctrl.Result{}, errors.Wrapf(err, "failed to apply %s label to MachineSet %q", clusterv1.MachineDeploymentLabelName, machineSet.Name)
248+
}
249+
}
250+
229251
if d.Spec.Paused {
230252
return ctrl.Result{}, r.sync(ctx, d, msList)
231253
}

internal/controllers/machinedeployment/machinedeployment_controller_test.go

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -203,8 +203,16 @@ func TestMachineDeploymentReconciler(t *testing.T) {
203203
})
204204
}, timeout).Should(BeTrue())
205205

206-
// Verify that expected number of machines are created
207-
t.Log("Verify expected number of machines are created")
206+
t.Log("Verify MachineSet has expected replicas and version")
207+
firstMachineSet := machineSets.Items[0]
208+
g.Expect(*firstMachineSet.Spec.Replicas).To(BeEquivalentTo(2))
209+
g.Expect(*firstMachineSet.Spec.Template.Spec.Version).To(BeEquivalentTo("v1.10.3"))
210+
211+
t.Log("Verify MachineSet has expected ClusterLabelName and MachineDeploymentLabelName")
212+
g.Expect(firstMachineSet.Labels[clusterv1.ClusterLabelName]).To(Equal(testCluster.Name))
213+
g.Expect(firstMachineSet.Labels[clusterv1.MachineDeploymentLabelName]).To(Equal(deployment.Name))
214+
215+
t.Log("Verify expected number of Machines are created")
208216
machines := &clusterv1.MachineList{}
209217
g.Eventually(func() int {
210218
if err := env.List(ctx, machines, client.InNamespace(namespace.Name)); err != nil {
@@ -213,16 +221,13 @@ func TestMachineDeploymentReconciler(t *testing.T) {
213221
return len(machines.Items)
214222
}, timeout).Should(BeEquivalentTo(*deployment.Spec.Replicas))
215223

216-
// Verify that machines has MachineSetLabelName and MachineDeploymentLabelName labels
217-
t.Log("Verify machines have expected MachineSetLabelName and MachineDeploymentLabelName")
224+
t.Log("Verify Machines have expected ClusterLabelName, MachineDeploymentLabelName and MachineSetLabelName")
218225
for _, m := range machines.Items {
219226
g.Expect(m.Labels[clusterv1.ClusterLabelName]).To(Equal(testCluster.Name))
227+
g.Expect(m.Labels[clusterv1.MachineDeploymentLabelName]).To(Equal(deployment.Name))
228+
g.Expect(m.Labels[clusterv1.MachineSetLabelName]).To(Equal(firstMachineSet.Name))
220229
}
221230

222-
firstMachineSet := machineSets.Items[0]
223-
g.Expect(*firstMachineSet.Spec.Replicas).To(BeEquivalentTo(2))
224-
g.Expect(*firstMachineSet.Spec.Template.Spec.Version).To(BeEquivalentTo("v1.10.3"))
225-
226231
//
227232
// Delete firstMachineSet and expect Reconcile to be called to replace it.
228233
//
@@ -348,7 +353,7 @@ func TestMachineDeploymentReconciler(t *testing.T) {
348353
clusterv1.ClusterLabelName: testCluster.Name,
349354
}
350355

351-
t.Log("Updating MachineDeployment label")
356+
t.Log("Updating MachineDeployment labels")
352357
modifyFunc = func(d *clusterv1.MachineDeployment) {
353358
d.Spec.Selector.MatchLabels = newLabels
354359
d.Spec.Template.Labels = newLabels

internal/controllers/machinedeployment/machinedeployment_sync.go

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -162,9 +162,10 @@ func (r *Reconciler) getNewMachineSet(ctx context.Context, d *clusterv1.MachineD
162162
newMS := clusterv1.MachineSet{
163163
ObjectMeta: metav1.ObjectMeta{
164164
// Make the name deterministic, to ensure idempotence
165-
Name: d.Name + "-" + apirand.SafeEncodeString(machineTemplateSpecHash),
166-
Namespace: d.Namespace,
167-
Labels: newMSTemplate.Labels,
165+
Name: d.Name + "-" + apirand.SafeEncodeString(machineTemplateSpecHash),
166+
Namespace: d.Namespace,
167+
Labels: make(map[string]string),
168+
// Note: by setting the ownerRef on creation we signal to the MachineSet controller that this is not a stand-alone MachineSet.
168169
OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(d, machineDeploymentKind)},
169170
},
170171
Spec: clusterv1.MachineSetSpec{
@@ -176,6 +177,18 @@ func (r *Reconciler) getNewMachineSet(ctx context.Context, d *clusterv1.MachineD
176177
},
177178
}
178179

180+
// Set the labels from newMSTemplate as top-level labels for the new MS.
181+
// Note: We can't just set `newMSTemplate.Labels` directly and thus "share" the labels map between top-level and
182+
// .spec.template.metadata.labels. This would mean that adding the MachineDeploymentLabelName later top-level
183+
// would also add the label to .spec.template.metadata.labels.
184+
for k, v := range newMSTemplate.Labels {
185+
newMS.Labels[k] = v
186+
}
187+
188+
// Enforce that the MachineDeploymentLabelName label is set
189+
// Note: the MachineDeploymentLabelName is added by the default webhook to MachineDeployment.spec.template.labels if spec.selector is empty.
190+
newMS.Labels[clusterv1.MachineDeploymentLabelName] = d.Name
191+
179192
if d.Spec.Strategy.RollingUpdate.DeletePolicy != nil {
180193
newMS.Spec.DeletePolicy = *d.Spec.Strategy.RollingUpdate.DeletePolicy
181194
}

internal/controllers/machinedeployment/mdutil/util.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -410,9 +410,9 @@ func FindNewMachineSet(deployment *clusterv1.MachineDeployment, msList []*cluste
410410
return nil
411411
}
412412

413-
// FindOldMachineSets returns the old machine sets targeted by the given Deployment, with the given slice of MSes.
413+
// FindOldMachineSets returns the old machine sets targeted by the given Deployment, within the given slice of MSes.
414414
// Returns two list of machine sets
415-
// - the first contains all old machine sets with all non-zero replicas
415+
// - the first contains all old machine sets with non-zero replicas
416416
// - the second contains all old machine sets
417417
func FindOldMachineSets(deployment *clusterv1.MachineDeployment, msList []*clusterv1.MachineSet) ([]*clusterv1.MachineSet, []*clusterv1.MachineSet) {
418418
var requiredMSs []*clusterv1.MachineSet

0 commit comments

Comments
 (0)