Skip to content

Commit b074410

Browse files
committed
delete annotated machines first
In certain circumstances the deleteMachine annotation isn't yet propagated from the Machine to the AzureMachinePoolMachine. This can result in deleting the wrong machines. This change ensures the owner Machine's deleteMachine annotation is always considered and those machines are deleted first.
1 parent 57ea5b8 commit b074410

File tree

4 files changed

+87
-53
lines changed

4 files changed

+87
-53
lines changed

azure/scope/machinepool.go

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -406,8 +406,31 @@ func (m *MachinePoolScope) applyAzureMachinePoolMachines(ctx context.Context) er
406406
}
407407

408408
existingMachinesByProviderID := make(map[string]infrav1exp.AzureMachinePoolMachine, len(ampms))
409-
for _, machine := range ampms {
410-
existingMachinesByProviderID[machine.Spec.ProviderID] = machine
409+
for _, ampm := range ampms {
410+
machine, err := util.GetOwnerMachine(ctx, m.client, ampm.ObjectMeta)
411+
if err != nil {
412+
return fmt.Errorf("failed to find owner machine for %s/%s: %w", ampm.Namespace, ampm.Name, err)
413+
}
414+
415+
if _, ampmHasDeleteAnnotation := ampm.Annotations[clusterv1.DeleteMachineAnnotation]; !ampmHasDeleteAnnotation {
416+
// fetch Machine delete annotation from owner machine to AzureMachinePoolMachine.
417+
// This ensures setting a deleteMachine annotation on the Machine has an effect on the AzureMachinePoolMachine
418+
// and the deployment strategy, in case the automatic propagation of the annotation from Machine to AzureMachinePoolMachine
419+
// hasn't been done yet.
420+
if machine != nil && machine.Annotations != nil {
421+
if _, hasDeleteAnnotation := machine.Annotations[clusterv1.DeleteMachineAnnotation]; hasDeleteAnnotation {
422+
log.V(4).Info("fetched DeleteMachineAnnotation", "machine", ampm.Spec.ProviderID)
423+
if ampm.Annotations == nil {
424+
ampm.Annotations = make(map[string]string)
425+
}
426+
ampm.Annotations[clusterv1.DeleteMachineAnnotation] = machine.Annotations[clusterv1.DeleteMachineAnnotation]
427+
}
428+
}
429+
} else {
430+
log.V(4).Info("DeleteMachineAnnotation already set")
431+
}
432+
433+
existingMachinesByProviderID[ampm.Spec.ProviderID] = ampm
411434
}
412435

413436
// determine which machines need to be created to reflect the current state in Azure

azure/scope/machinepool_test.go

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"fmt"
2222
"reflect"
2323
"testing"
24+
"time"
2425

2526
"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
2627
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/authorization/armauthorization/v2"
@@ -1414,6 +1415,46 @@ func TestMachinePoolScope_applyAzureMachinePoolMachines(t *testing.T) {
14141415
g.Expect(list.Items).Should(HaveLen(1))
14151416
},
14161417
},
1418+
{
1419+
Name: "if MachinePool is not externally managed, and Machines have delete machine annotation, and overProvisionCount > 0, delete machines with deleteMachine annotation first",
1420+
Setup: func(mp *expv1.MachinePool, amp *infrav1exp.AzureMachinePool, vmssState *azure.VMSS, cb *fake.ClientBuilder) {
1421+
mp.Spec.Replicas = ptr.To[int32](2)
1422+
1423+
mpm1, ampm1 := getAzureMachinePoolMachineWithOwnerMachine(1)
1424+
1425+
mpm2, ampm2 := getAzureMachinePoolMachineWithOwnerMachine(2)
1426+
mpm2.Annotations = map[string]string{
1427+
clusterv1.DeleteMachineAnnotation: time.Now().String(),
1428+
}
1429+
1430+
mpm3, ampm3 := getAzureMachinePoolMachineWithOwnerMachine(3)
1431+
objects := []client.Object{&mpm1, &ampm1, &mpm2, &ampm2, &mpm3, &ampm3}
1432+
cb.WithObjects(objects...)
1433+
1434+
vmssState.Instances = []azure.VMSSVM{
1435+
{
1436+
ID: "/subscriptions/123/resourceGroups/my-rg/providers/Microsoft.Compute/virtualMachineScaleSets/my-vmss/virtualMachines/1",
1437+
Name: "ampm1",
1438+
},
1439+
{
1440+
ID: "/subscriptions/123/resourceGroups/my-rg/providers/Microsoft.Compute/virtualMachineScaleSets/my-vmss/virtualMachines/2",
1441+
Name: "ampm2",
1442+
},
1443+
{
1444+
ID: "/subscriptions/123/resourceGroups/my-rg/providers/Microsoft.Compute/virtualMachineScaleSets/my-vmss/virtualMachines/3",
1445+
Name: "ampm3",
1446+
},
1447+
}
1448+
},
1449+
Verify: func(g *WithT, amp *infrav1exp.AzureMachinePool, c client.Client, err error) {
1450+
g.Expect(err).NotTo(HaveOccurred())
1451+
list := clusterv1.MachineList{}
1452+
g.Expect(c.List(ctx, &list)).NotTo(HaveOccurred())
1453+
g.Expect(list.Items).Should(HaveLen(2))
1454+
g.Expect(list.Items[0].Name).Should(Equal("mpm1"))
1455+
g.Expect(list.Items[1].Name).Should(Equal("mpm3"))
1456+
},
1457+
},
14171458
{
14181459
Name: "if existing MachinePool is not present, reduce replicas",
14191460
Setup: func(mp *expv1.MachinePool, amp *infrav1exp.AzureMachinePool, vmssState *azure.VMSS, cb *fake.ClientBuilder) {

azure/scope/strategies/machinepool_deployments/machinepool_deployment_strategy.go

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ func (rollingUpdateStrategy rollingUpdateStrategy) SelectMachinesToDelete(ctx co
129129
}
130130
}()
131131
log = ctrl.LoggerFrom(ctx).V(4)
132+
deleteAnnotatedMachines = order(getDeleteAnnotatedMachines(machinesByProviderID))
132133
failedMachines = order(getFailedMachines(machinesByProviderID))
133134
deletingMachines = order(getDeletingMachines(machinesByProviderID))
134135
readyMachines = order(getReadyMachines(machinesByProviderID))
@@ -143,23 +144,23 @@ func (rollingUpdateStrategy rollingUpdateStrategy) SelectMachinesToDelete(ctx co
143144
}()
144145
)
145146

146-
// Order AzureMachinePoolMachines with the clutserv1.DeleteMachineAnnotation to the front so that they have delete priority.
147-
// This allows MachinePool Machines to work with the autoscaler.
148-
failedMachines = orderByDeleteMachineAnnotation(failedMachines)
149-
deletingMachines = orderByDeleteMachineAnnotation(deletingMachines)
150-
readyMachines = orderByDeleteMachineAnnotation(readyMachines)
151-
machinesWithoutLatestModel = orderByDeleteMachineAnnotation(machinesWithoutLatestModel)
152-
153147
log.Info("selecting machines to delete",
154148
"readyMachines", len(readyMachines),
155149
"desiredReplicaCount", desiredReplicaCount,
156150
"maxUnavailable", maxUnavailable,
157151
"disruptionBudget", disruptionBudget,
158152
"machinesWithoutTheLatestModel", len(machinesWithoutLatestModel),
153+
"deleteAnnotatedMachines", len(deleteAnnotatedMachines),
159154
"failedMachines", len(failedMachines),
160155
"deletingMachines", len(deletingMachines),
161156
)
162157

158+
// if we have machines annotated with delete machine, remove them
159+
if len(deleteAnnotatedMachines) > 0 {
160+
log.Info("delete annotated machines", "desiredReplicaCount", desiredReplicaCount, "maxUnavailable", maxUnavailable, "deleteAnnotatedMachines", getProviderIDs(deleteAnnotatedMachines))
161+
return deleteAnnotatedMachines, nil
162+
}
163+
163164
// if we have failed or deleting machines, remove them
164165
if len(failedMachines) > 0 || len(deletingMachines) > 0 {
165166
log.Info("failed or deleting machines", "desiredReplicaCount", desiredReplicaCount, "maxUnavailable", maxUnavailable, "failedMachines", getProviderIDs(failedMachines), "deletingMachines", getProviderIDs(deletingMachines))
@@ -224,6 +225,18 @@ func (rollingUpdateStrategy rollingUpdateStrategy) SelectMachinesToDelete(ctx co
224225
return toDelete, nil
225226
}
226227

228+
func getDeleteAnnotatedMachines(machinesByProviderID map[string]infrav1exp.AzureMachinePoolMachine) []infrav1exp.AzureMachinePoolMachine {
229+
var machines []infrav1exp.AzureMachinePoolMachine
230+
for _, v := range machinesByProviderID {
231+
if v.Annotations != nil {
232+
if _, hasDeleteAnnotation := v.Annotations[clusterv1.DeleteMachineAnnotation]; hasDeleteAnnotation {
233+
machines = append(machines, v)
234+
}
235+
}
236+
}
237+
return machines
238+
}
239+
227240
func getFailedMachines(machinesByProviderID map[string]infrav1exp.AzureMachinePoolMachine) []infrav1exp.AzureMachinePoolMachine {
228241
var machines []infrav1exp.AzureMachinePoolMachine
229242
for _, v := range machinesByProviderID {
@@ -304,18 +317,6 @@ func orderRandom(machines []infrav1exp.AzureMachinePoolMachine) []infrav1exp.Azu
304317
return machines
305318
}
306319

307-
// orderByDeleteMachineAnnotation will sort AzureMachinePoolMachines with the clusterv1.DeleteMachineAnnotation to the front of the list.
308-
// It will preserve the existing order of the list otherwise so that it respects the existing delete priority otherwise.
309-
func orderByDeleteMachineAnnotation(machines []infrav1exp.AzureMachinePoolMachine) []infrav1exp.AzureMachinePoolMachine {
310-
sort.SliceStable(machines, func(i, j int) bool {
311-
_, iHasAnnotation := machines[i].Annotations[clusterv1.DeleteMachineAnnotation]
312-
313-
return iHasAnnotation
314-
})
315-
316-
return machines
317-
}
318-
319320
func getProviderIDs(machines []infrav1exp.AzureMachinePoolMachine) []string {
320321
ids := make([]string, len(machines))
321322
for i, machine := range machines {

azure/scope/strategies/machinepool_deployments/machinepool_deployment_strategy_test.go

Lines changed: 1 addition & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ func TestMachinePoolRollingUpdateStrategy_SelectMachinesToDelete(t *testing.T) {
239239
}),
240240
},
241241
{
242-
name: "if over-provisioned and has delete machine annotation, select machines those first and then by oldest",
242+
name: "if machine has delete machine annotation, select those machines first",
243243
strategy: makeRollingUpdateStrategy(infrav1exp.MachineRollingUpdateDeployment{DeletePolicy: infrav1exp.OldestDeletePolicyType}),
244244
desiredReplicas: 2,
245245
input: map[string]infrav1exp.AzureMachinePoolMachine{
@@ -250,7 +250,6 @@ func TestMachinePoolRollingUpdateStrategy_SelectMachinesToDelete(t *testing.T) {
250250
},
251251
want: gomega.DiffEq([]infrav1exp.AzureMachinePoolMachine{
252252
makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(3 * time.Hour)), HasDeleteMachineAnnotation: true}),
253-
makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(1 * time.Hour))}),
254253
}),
255254
},
256255
{
@@ -268,21 +267,6 @@ func TestMachinePoolRollingUpdateStrategy_SelectMachinesToDelete(t *testing.T) {
268267
makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(2 * time.Hour))}),
269268
}),
270269
},
271-
{
272-
name: "if over-provisioned and has delete machine annotation, prioritize those machines first over creation date",
273-
strategy: makeRollingUpdateStrategy(infrav1exp.MachineRollingUpdateDeployment{DeletePolicy: infrav1exp.OldestDeletePolicyType}),
274-
desiredReplicas: 2,
275-
input: map[string]infrav1exp.AzureMachinePoolMachine{
276-
"foo": makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(4 * time.Hour))}),
277-
"bin": makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(3 * time.Hour)), HasDeleteMachineAnnotation: true}),
278-
"baz": makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(2 * time.Hour))}),
279-
"bar": makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(1 * time.Hour))}),
280-
},
281-
want: gomega.DiffEq([]infrav1exp.AzureMachinePoolMachine{
282-
makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(3 * time.Hour)), HasDeleteMachineAnnotation: true}),
283-
makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(1 * time.Hour))}),
284-
}),
285-
},
286270
{
287271
name: "if over-provisioned, select machines ordered by newest first",
288272
strategy: makeRollingUpdateStrategy(infrav1exp.MachineRollingUpdateDeployment{DeletePolicy: infrav1exp.NewestDeletePolicyType}),
@@ -298,21 +282,6 @@ func TestMachinePoolRollingUpdateStrategy_SelectMachinesToDelete(t *testing.T) {
298282
makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(3 * time.Hour))}),
299283
}),
300284
},
301-
{
302-
name: "if over-provisioned and has delete machine annotation, select those machines first followed by newest",
303-
strategy: makeRollingUpdateStrategy(infrav1exp.MachineRollingUpdateDeployment{DeletePolicy: infrav1exp.NewestDeletePolicyType}),
304-
desiredReplicas: 2,
305-
input: map[string]infrav1exp.AzureMachinePoolMachine{
306-
"foo": makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(4 * time.Hour))}),
307-
"bin": makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(3 * time.Hour))}),
308-
"baz": makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(2 * time.Hour))}),
309-
"bar": makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(1 * time.Hour)), HasDeleteMachineAnnotation: true}),
310-
},
311-
want: gomega.DiffEq([]infrav1exp.AzureMachinePoolMachine{
312-
makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(1 * time.Hour)), HasDeleteMachineAnnotation: true}),
313-
makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(4 * time.Hour))}),
314-
}),
315-
},
316285
{
317286
name: "if over-provisioned but with an equivalent number marked for deletion, nothing to do; this is the case where Azure has not yet caught up to capz",
318287
strategy: makeRollingUpdateStrategy(infrav1exp.MachineRollingUpdateDeployment{DeletePolicy: infrav1exp.OldestDeletePolicyType}),

0 commit comments

Comments
 (0)