Skip to content

Commit 0b71e08

Browse files
committed
delete annotated machines before other machines
In certain circumstances the deleteMachine annotation isn't yet propagated from the Machine to the AzureMachinePoolMachine. This can result in deleting the wrong machines. This change ensures the owner Machine's deleteMachine annotation is always considered and those machines are deleted before other kind of machines.
1 parent 711d861 commit 0b71e08

File tree

4 files changed

+88
-37
lines changed

4 files changed

+88
-37
lines changed

azure/scope/machinepool.go

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -406,8 +406,31 @@ func (m *MachinePoolScope) applyAzureMachinePoolMachines(ctx context.Context) er
406406
}
407407

408408
existingMachinesByProviderID := make(map[string]infrav1exp.AzureMachinePoolMachine, len(ampms))
409-
for _, machine := range ampms {
410-
existingMachinesByProviderID[machine.Spec.ProviderID] = machine
409+
for _, ampm := range ampms {
410+
machine, err := util.GetOwnerMachine(ctx, m.client, ampm.ObjectMeta)
411+
if err != nil {
412+
return fmt.Errorf("failed to find owner machine for %s/%s: %w", ampm.Namespace, ampm.Name, err)
413+
}
414+
415+
if _, ampmHasDeleteAnnotation := ampm.Annotations[clusterv1.DeleteMachineAnnotation]; !ampmHasDeleteAnnotation {
416+
// fetch Machine delete annotation from owner machine to AzureMachinePoolMachine.
417+
// This ensures setting a deleteMachine annotation on the Machine has an effect on the AzureMachinePoolMachine
418+
// and the deployment strategy, in case the automatic propagation of the annotation from Machine to AzureMachinePoolMachine
419+
// hasn't been done yet.
420+
if machine != nil && machine.Annotations != nil {
421+
if _, hasDeleteAnnotation := machine.Annotations[clusterv1.DeleteMachineAnnotation]; hasDeleteAnnotation {
422+
log.V(4).Info("fetched DeleteMachineAnnotation", "machine", ampm.Spec.ProviderID)
423+
if ampm.Annotations == nil {
424+
ampm.Annotations = make(map[string]string)
425+
}
426+
ampm.Annotations[clusterv1.DeleteMachineAnnotation] = machine.Annotations[clusterv1.DeleteMachineAnnotation]
427+
}
428+
}
429+
} else {
430+
log.V(4).Info("DeleteMachineAnnotation already set")
431+
}
432+
433+
existingMachinesByProviderID[ampm.Spec.ProviderID] = ampm
411434
}
412435

413436
// determine which machines need to be created to reflect the current state in Azure

azure/scope/machinepool_test.go

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"fmt"
2222
"reflect"
2323
"testing"
24+
"time"
2425

2526
"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
2627
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/authorization/armauthorization/v2"
@@ -1414,6 +1415,46 @@ func TestMachinePoolScope_applyAzureMachinePoolMachines(t *testing.T) {
14141415
g.Expect(list.Items).Should(HaveLen(1))
14151416
},
14161417
},
1418+
{
1419+
Name: "if MachinePool is not externally managed, and Machines have delete machine annotation, and overProvisionCount > 0, delete machines with deleteMachine annotation first",
1420+
Setup: func(mp *expv1.MachinePool, amp *infrav1exp.AzureMachinePool, vmssState *azure.VMSS, cb *fake.ClientBuilder) {
1421+
mp.Spec.Replicas = ptr.To[int32](2)
1422+
1423+
mpm1, ampm1 := getAzureMachinePoolMachineWithOwnerMachine(1)
1424+
1425+
mpm2, ampm2 := getAzureMachinePoolMachineWithOwnerMachine(2)
1426+
mpm2.Annotations = map[string]string{
1427+
clusterv1.DeleteMachineAnnotation: time.Now().String(),
1428+
}
1429+
1430+
mpm3, ampm3 := getAzureMachinePoolMachineWithOwnerMachine(3)
1431+
objects := []client.Object{&mpm1, &ampm1, &mpm2, &ampm2, &mpm3, &ampm3}
1432+
cb.WithObjects(objects...)
1433+
1434+
vmssState.Instances = []azure.VMSSVM{
1435+
{
1436+
ID: "/subscriptions/123/resourceGroups/my-rg/providers/Microsoft.Compute/virtualMachineScaleSets/my-vmss/virtualMachines/1",
1437+
Name: "ampm1",
1438+
},
1439+
{
1440+
ID: "/subscriptions/123/resourceGroups/my-rg/providers/Microsoft.Compute/virtualMachineScaleSets/my-vmss/virtualMachines/2",
1441+
Name: "ampm2",
1442+
},
1443+
{
1444+
ID: "/subscriptions/123/resourceGroups/my-rg/providers/Microsoft.Compute/virtualMachineScaleSets/my-vmss/virtualMachines/3",
1445+
Name: "ampm3",
1446+
},
1447+
}
1448+
},
1449+
Verify: func(g *WithT, amp *infrav1exp.AzureMachinePool, c client.Client, err error) {
1450+
g.Expect(err).NotTo(HaveOccurred())
1451+
list := clusterv1.MachineList{}
1452+
g.Expect(c.List(ctx, &list)).NotTo(HaveOccurred())
1453+
g.Expect(list.Items).Should(HaveLen(2))
1454+
g.Expect(list.Items[0].Name).Should(Equal("mpm1"))
1455+
g.Expect(list.Items[1].Name).Should(Equal("mpm3"))
1456+
},
1457+
},
14171458
{
14181459
Name: "if existing MachinePool is not present, reduce replicas",
14191460
Setup: func(mp *expv1.MachinePool, amp *infrav1exp.AzureMachinePool, vmssState *azure.VMSS, cb *fake.ClientBuilder) {

azure/scope/strategies/machinepool_deployments/machinepool_deployment_strategy.go

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ func (rollingUpdateStrategy rollingUpdateStrategy) SelectMachinesToDelete(ctx co
129129
}
130130
}()
131131
log = ctrl.LoggerFrom(ctx).V(4)
132+
deleteAnnotatedMachines = order(getDeleteAnnotatedMachines(machinesByProviderID))
132133
failedMachines = order(getFailedMachines(machinesByProviderID))
133134
deletingMachines = order(getDeletingMachines(machinesByProviderID))
134135
readyMachines = order(getReadyMachines(machinesByProviderID))
@@ -143,19 +144,18 @@ func (rollingUpdateStrategy rollingUpdateStrategy) SelectMachinesToDelete(ctx co
143144
}()
144145
)
145146

146-
// Order AzureMachinePoolMachines with the clutserv1.DeleteMachineAnnotation to the front so that they have delete priority.
147+
// Order AzureMachinePoolMachines with the clusterv1.DeleteMachineAnnotation to the front so that they have delete priority.
147148
// This allows MachinePool Machines to work with the autoscaler.
148149
failedMachines = orderByDeleteMachineAnnotation(failedMachines)
149150
deletingMachines = orderByDeleteMachineAnnotation(deletingMachines)
150-
readyMachines = orderByDeleteMachineAnnotation(readyMachines)
151-
machinesWithoutLatestModel = orderByDeleteMachineAnnotation(machinesWithoutLatestModel)
152151

153152
log.Info("selecting machines to delete",
154153
"readyMachines", len(readyMachines),
155154
"desiredReplicaCount", desiredReplicaCount,
156155
"maxUnavailable", maxUnavailable,
157156
"disruptionBudget", disruptionBudget,
158157
"machinesWithoutTheLatestModel", len(machinesWithoutLatestModel),
158+
"deleteAnnotatedMachines", len(deleteAnnotatedMachines),
159159
"failedMachines", len(failedMachines),
160160
"deletingMachines", len(deletingMachines),
161161
)
@@ -166,6 +166,12 @@ func (rollingUpdateStrategy rollingUpdateStrategy) SelectMachinesToDelete(ctx co
166166
return append(failedMachines, deletingMachines...), nil
167167
}
168168

169+
// if we have machines annotated with delete machine, remove them
170+
if len(deleteAnnotatedMachines) > 0 {
171+
log.Info("delete annotated machines", "desiredReplicaCount", desiredReplicaCount, "maxUnavailable", maxUnavailable, "deleteAnnotatedMachines", getProviderIDs(deleteAnnotatedMachines))
172+
return deleteAnnotatedMachines, nil
173+
}
174+
169175
// if we have not yet reached our desired count, don't try to delete anything
170176
if len(readyMachines) < int(desiredReplicaCount) {
171177
log.Info("not enough ready machines", "desiredReplicaCount", desiredReplicaCount, "readyMachinesCount", len(readyMachines), "machinesByProviderID", len(machinesByProviderID))
@@ -224,6 +230,18 @@ func (rollingUpdateStrategy rollingUpdateStrategy) SelectMachinesToDelete(ctx co
224230
return toDelete, nil
225231
}
226232

233+
func getDeleteAnnotatedMachines(machinesByProviderID map[string]infrav1exp.AzureMachinePoolMachine) []infrav1exp.AzureMachinePoolMachine {
234+
var machines []infrav1exp.AzureMachinePoolMachine
235+
for _, v := range machinesByProviderID {
236+
if v.Annotations != nil {
237+
if _, hasDeleteAnnotation := v.Annotations[clusterv1.DeleteMachineAnnotation]; hasDeleteAnnotation {
238+
machines = append(machines, v)
239+
}
240+
}
241+
}
242+
return machines
243+
}
244+
227245
func getFailedMachines(machinesByProviderID map[string]infrav1exp.AzureMachinePoolMachine) []infrav1exp.AzureMachinePoolMachine {
228246
var machines []infrav1exp.AzureMachinePoolMachine
229247
for _, v := range machinesByProviderID {

azure/scope/strategies/machinepool_deployments/machinepool_deployment_strategy_test.go

Lines changed: 1 addition & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ func TestMachinePoolRollingUpdateStrategy_SelectMachinesToDelete(t *testing.T) {
239239
}),
240240
},
241241
{
242-
name: "if over-provisioned and has delete machine annotation, select machines those first and then by oldest",
242+
name: "if machine has delete machine annotation, select those machines first",
243243
strategy: makeRollingUpdateStrategy(infrav1exp.MachineRollingUpdateDeployment{DeletePolicy: infrav1exp.OldestDeletePolicyType}),
244244
desiredReplicas: 2,
245245
input: map[string]infrav1exp.AzureMachinePoolMachine{
@@ -250,7 +250,6 @@ func TestMachinePoolRollingUpdateStrategy_SelectMachinesToDelete(t *testing.T) {
250250
},
251251
want: gomega.DiffEq([]infrav1exp.AzureMachinePoolMachine{
252252
makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(3 * time.Hour)), HasDeleteMachineAnnotation: true}),
253-
makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(1 * time.Hour))}),
254253
}),
255254
},
256255
{
@@ -268,21 +267,6 @@ func TestMachinePoolRollingUpdateStrategy_SelectMachinesToDelete(t *testing.T) {
268267
makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(2 * time.Hour))}),
269268
}),
270269
},
271-
{
272-
name: "if over-provisioned and has delete machine annotation, prioritize those machines first over creation date",
273-
strategy: makeRollingUpdateStrategy(infrav1exp.MachineRollingUpdateDeployment{DeletePolicy: infrav1exp.OldestDeletePolicyType}),
274-
desiredReplicas: 2,
275-
input: map[string]infrav1exp.AzureMachinePoolMachine{
276-
"foo": makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(4 * time.Hour))}),
277-
"bin": makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(3 * time.Hour)), HasDeleteMachineAnnotation: true}),
278-
"baz": makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(2 * time.Hour))}),
279-
"bar": makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(1 * time.Hour))}),
280-
},
281-
want: gomega.DiffEq([]infrav1exp.AzureMachinePoolMachine{
282-
makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(3 * time.Hour)), HasDeleteMachineAnnotation: true}),
283-
makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(1 * time.Hour))}),
284-
}),
285-
},
286270
{
287271
name: "if over-provisioned, select machines ordered by newest first",
288272
strategy: makeRollingUpdateStrategy(infrav1exp.MachineRollingUpdateDeployment{DeletePolicy: infrav1exp.NewestDeletePolicyType}),
@@ -298,21 +282,6 @@ func TestMachinePoolRollingUpdateStrategy_SelectMachinesToDelete(t *testing.T) {
298282
makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(3 * time.Hour))}),
299283
}),
300284
},
301-
{
302-
name: "if over-provisioned and has delete machine annotation, select those machines first followed by newest",
303-
strategy: makeRollingUpdateStrategy(infrav1exp.MachineRollingUpdateDeployment{DeletePolicy: infrav1exp.NewestDeletePolicyType}),
304-
desiredReplicas: 2,
305-
input: map[string]infrav1exp.AzureMachinePoolMachine{
306-
"foo": makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(4 * time.Hour))}),
307-
"bin": makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(3 * time.Hour))}),
308-
"baz": makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(2 * time.Hour))}),
309-
"bar": makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(1 * time.Hour)), HasDeleteMachineAnnotation: true}),
310-
},
311-
want: gomega.DiffEq([]infrav1exp.AzureMachinePoolMachine{
312-
makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(1 * time.Hour)), HasDeleteMachineAnnotation: true}),
313-
makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(4 * time.Hour))}),
314-
}),
315-
},
316285
{
317286
name: "if over-provisioned but with an equivalent number marked for deletion, nothing to do; this is the case where Azure has not yet caught up to capz",
318287
strategy: makeRollingUpdateStrategy(infrav1exp.MachineRollingUpdateDeployment{DeletePolicy: infrav1exp.OldestDeletePolicyType}),

0 commit comments

Comments
 (0)