diff --git a/azure/scope/machinepool.go b/azure/scope/machinepool.go index cdec2b65d84..e4efb2b625d 100644 --- a/azure/scope/machinepool.go +++ b/azure/scope/machinepool.go @@ -406,8 +406,31 @@ func (m *MachinePoolScope) applyAzureMachinePoolMachines(ctx context.Context) er } existingMachinesByProviderID := make(map[string]infrav1exp.AzureMachinePoolMachine, len(ampms)) - for _, machine := range ampms { - existingMachinesByProviderID[machine.Spec.ProviderID] = machine + for _, ampm := range ampms { + machine, err := util.GetOwnerMachine(ctx, m.client, ampm.ObjectMeta) + if err != nil { + return fmt.Errorf("failed to find owner machine for %s/%s: %w", ampm.Namespace, ampm.Name, err) + } + + if _, ampmHasDeleteAnnotation := ampm.Annotations[clusterv1.DeleteMachineAnnotation]; !ampmHasDeleteAnnotation { + // fetch Machine delete annotation from owner machine to AzureMachinePoolMachine. + // This ensures setting a deleteMachine annotation on the Machine has an effect on the AzureMachinePoolMachine + // and the deployment strategy, in case the automatic propagation of the annotation from Machine to AzureMachinePoolMachine + // hasn't been done yet. + if machine != nil && machine.Annotations != nil { + if _, hasDeleteAnnotation := machine.Annotations[clusterv1.DeleteMachineAnnotation]; hasDeleteAnnotation { + log.V(4).Info("fetched DeleteMachineAnnotation", "machine", ampm.Spec.ProviderID) + if ampm.Annotations == nil { + ampm.Annotations = make(map[string]string) + } + ampm.Annotations[clusterv1.DeleteMachineAnnotation] = machine.Annotations[clusterv1.DeleteMachineAnnotation] + } + } + } else { + log.V(4).Info("DeleteMachineAnnotation already set") + } + + existingMachinesByProviderID[ampm.Spec.ProviderID] = ampm } // determine which machines need to be created to reflect the current state in Azure diff --git a/azure/scope/machinepool_test.go b/azure/scope/machinepool_test.go index 0c3602c6b3c..83d8b5d740c 100644 --- a/azure/scope/machinepool_test.go +++ b/azure/scope/machinepool_test.go @@ -21,6 +21,7 @@ import ( "fmt" "reflect" "testing" + "time" "github.com/Azure/azure-sdk-for-go/sdk/azidentity" "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/authorization/armauthorization/v2" @@ -1414,6 +1415,46 @@ func TestMachinePoolScope_applyAzureMachinePoolMachines(t *testing.T) { g.Expect(list.Items).Should(HaveLen(1)) }, }, + { + Name: "if MachinePool is not externally managed, and Machines have delete machine annotation, and overProvisionCount > 0, delete machines with deleteMachine annotation first", + Setup: func(mp *expv1.MachinePool, amp *infrav1exp.AzureMachinePool, vmssState *azure.VMSS, cb *fake.ClientBuilder) { + mp.Spec.Replicas = ptr.To[int32](2) + + mpm1, ampm1 := getAzureMachinePoolMachineWithOwnerMachine(1) + + mpm2, ampm2 := getAzureMachinePoolMachineWithOwnerMachine(2) + mpm2.Annotations = map[string]string{ + clusterv1.DeleteMachineAnnotation: time.Now().String(), + } + + mpm3, ampm3 := getAzureMachinePoolMachineWithOwnerMachine(3) + objects := []client.Object{&mpm1, &m1, &mpm2, &m2, &mpm3, &m3} + cb.WithObjects(objects...) + + vmssState.Instances = []azure.VMSSVM{ + { + ID: "/subscriptions/123/resourceGroups/my-rg/providers/Microsoft.Compute/virtualMachineScaleSets/my-vmss/virtualMachines/1", + Name: "ampm1", + }, + { + ID: "/subscriptions/123/resourceGroups/my-rg/providers/Microsoft.Compute/virtualMachineScaleSets/my-vmss/virtualMachines/2", + Name: "ampm2", + }, + { + ID: "/subscriptions/123/resourceGroups/my-rg/providers/Microsoft.Compute/virtualMachineScaleSets/my-vmss/virtualMachines/3", + Name: "ampm3", + }, + } + }, + Verify: func(g *WithT, amp *infrav1exp.AzureMachinePool, c client.Client, err error) { + g.Expect(err).NotTo(HaveOccurred()) + list := clusterv1.MachineList{} + g.Expect(c.List(ctx, &list)).NotTo(HaveOccurred()) + g.Expect(list.Items).Should(HaveLen(2)) + g.Expect(list.Items[0].Name).Should(Equal("mpm1")) + g.Expect(list.Items[1].Name).Should(Equal("mpm3")) + }, + }, { Name: "if existing MachinePool is not present, reduce replicas", Setup: func(mp *expv1.MachinePool, amp *infrav1exp.AzureMachinePool, vmssState *azure.VMSS, cb *fake.ClientBuilder) { diff --git a/azure/scope/strategies/machinepool_deployments/machinepool_deployment_strategy.go b/azure/scope/strategies/machinepool_deployments/machinepool_deployment_strategy.go index 2daaac8a464..30afa56a8ab 100644 --- a/azure/scope/strategies/machinepool_deployments/machinepool_deployment_strategy.go +++ b/azure/scope/strategies/machinepool_deployments/machinepool_deployment_strategy.go @@ -129,6 +129,7 @@ func (rollingUpdateStrategy rollingUpdateStrategy) SelectMachinesToDelete(ctx co } }() log = ctrl.LoggerFrom(ctx).V(4) + deleteAnnotatedMachines = order(getDeleteAnnotatedMachines(machinesByProviderID)) failedMachines = order(getFailedMachines(machinesByProviderID)) deletingMachines = order(getDeletingMachines(machinesByProviderID)) readyMachines = order(getReadyMachines(machinesByProviderID)) @@ -143,12 +144,10 @@ func (rollingUpdateStrategy rollingUpdateStrategy) SelectMachinesToDelete(ctx co }() ) - // Order AzureMachinePoolMachines with the clutserv1.DeleteMachineAnnotation to the front so that they have delete priority. + // Order AzureMachinePoolMachines with the clusterv1.DeleteMachineAnnotation to the front so that they have delete priority. // This allows MachinePool Machines to work with the autoscaler. failedMachines = orderByDeleteMachineAnnotation(failedMachines) deletingMachines = orderByDeleteMachineAnnotation(deletingMachines) - readyMachines = orderByDeleteMachineAnnotation(readyMachines) - machinesWithoutLatestModel = orderByDeleteMachineAnnotation(machinesWithoutLatestModel) log.Info("selecting machines to delete", "readyMachines", len(readyMachines), @@ -156,6 +155,7 @@ func (rollingUpdateStrategy rollingUpdateStrategy) SelectMachinesToDelete(ctx co "maxUnavailable", maxUnavailable, "disruptionBudget", disruptionBudget, "machinesWithoutTheLatestModel", len(machinesWithoutLatestModel), + "deleteAnnotatedMachines", len(deleteAnnotatedMachines), "failedMachines", len(failedMachines), "deletingMachines", len(deletingMachines), ) @@ -166,6 +166,12 @@ func (rollingUpdateStrategy rollingUpdateStrategy) SelectMachinesToDelete(ctx co return append(failedMachines, deletingMachines...), nil } + // if we have machines annotated with delete machine, remove them + if len(deleteAnnotatedMachines) > 0 { + log.Info("delete annotated machines", "desiredReplicaCount", desiredReplicaCount, "maxUnavailable", maxUnavailable, "deleteAnnotatedMachines", getProviderIDs(deleteAnnotatedMachines)) + return deleteAnnotatedMachines, nil + } + // if we have not yet reached our desired count, don't try to delete anything if len(readyMachines) < int(desiredReplicaCount) { log.Info("not enough ready machines", "desiredReplicaCount", desiredReplicaCount, "readyMachinesCount", len(readyMachines), "machinesByProviderID", len(machinesByProviderID)) @@ -224,6 +230,18 @@ func (rollingUpdateStrategy rollingUpdateStrategy) SelectMachinesToDelete(ctx co return toDelete, nil } +func getDeleteAnnotatedMachines(machinesByProviderID map[string]infrav1exp.AzureMachinePoolMachine) []infrav1exp.AzureMachinePoolMachine { + var machines []infrav1exp.AzureMachinePoolMachine + for _, v := range machinesByProviderID { + if v.Annotations != nil { + if _, hasDeleteAnnotation := v.Annotations[clusterv1.DeleteMachineAnnotation]; hasDeleteAnnotation { + machines = append(machines, v) + } + } + } + return machines +} + func getFailedMachines(machinesByProviderID map[string]infrav1exp.AzureMachinePoolMachine) []infrav1exp.AzureMachinePoolMachine { var machines []infrav1exp.AzureMachinePoolMachine for _, v := range machinesByProviderID { diff --git a/azure/scope/strategies/machinepool_deployments/machinepool_deployment_strategy_test.go b/azure/scope/strategies/machinepool_deployments/machinepool_deployment_strategy_test.go index 3ea709e7682..0d429f87b4c 100644 --- a/azure/scope/strategies/machinepool_deployments/machinepool_deployment_strategy_test.go +++ b/azure/scope/strategies/machinepool_deployments/machinepool_deployment_strategy_test.go @@ -239,7 +239,7 @@ func TestMachinePoolRollingUpdateStrategy_SelectMachinesToDelete(t *testing.T) { }), }, { - name: "if over-provisioned and has delete machine annotation, select machines those first and then by oldest", + name: "if machine has delete machine annotation, select those machines first", strategy: makeRollingUpdateStrategy(infrav1exp.MachineRollingUpdateDeployment{DeletePolicy: infrav1exp.OldestDeletePolicyType}), desiredReplicas: 2, input: map[string]infrav1exp.AzureMachinePoolMachine{ @@ -250,7 +250,6 @@ func TestMachinePoolRollingUpdateStrategy_SelectMachinesToDelete(t *testing.T) { }, want: gomega.DiffEq([]infrav1exp.AzureMachinePoolMachine{ makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(3 * time.Hour)), HasDeleteMachineAnnotation: true}), - makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(1 * time.Hour))}), }), }, { @@ -268,21 +267,6 @@ func TestMachinePoolRollingUpdateStrategy_SelectMachinesToDelete(t *testing.T) { makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(2 * time.Hour))}), }), }, - { - name: "if over-provisioned and has delete machine annotation, prioritize those machines first over creation date", - strategy: makeRollingUpdateStrategy(infrav1exp.MachineRollingUpdateDeployment{DeletePolicy: infrav1exp.OldestDeletePolicyType}), - desiredReplicas: 2, - input: map[string]infrav1exp.AzureMachinePoolMachine{ - "foo": makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(4 * time.Hour))}), - "bin": makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(3 * time.Hour)), HasDeleteMachineAnnotation: true}), - "baz": makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(2 * time.Hour))}), - "bar": makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(1 * time.Hour))}), - }, - want: gomega.DiffEq([]infrav1exp.AzureMachinePoolMachine{ - makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(3 * time.Hour)), HasDeleteMachineAnnotation: true}), - makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(1 * time.Hour))}), - }), - }, { name: "if over-provisioned, select machines ordered by newest first", strategy: makeRollingUpdateStrategy(infrav1exp.MachineRollingUpdateDeployment{DeletePolicy: infrav1exp.NewestDeletePolicyType}), @@ -298,21 +282,6 @@ func TestMachinePoolRollingUpdateStrategy_SelectMachinesToDelete(t *testing.T) { makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(3 * time.Hour))}), }), }, - { - name: "if over-provisioned and has delete machine annotation, select those machines first followed by newest", - strategy: makeRollingUpdateStrategy(infrav1exp.MachineRollingUpdateDeployment{DeletePolicy: infrav1exp.NewestDeletePolicyType}), - desiredReplicas: 2, - input: map[string]infrav1exp.AzureMachinePoolMachine{ - "foo": makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(4 * time.Hour))}), - "bin": makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(3 * time.Hour))}), - "baz": makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(2 * time.Hour))}), - "bar": makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(1 * time.Hour)), HasDeleteMachineAnnotation: true}), - }, - want: gomega.DiffEq([]infrav1exp.AzureMachinePoolMachine{ - makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(1 * time.Hour)), HasDeleteMachineAnnotation: true}), - makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(4 * time.Hour))}), - }), - }, { name: "if over-provisioned but with an equivalent number marked for deletion, nothing to do; this is the case where Azure has not yet caught up to capz", strategy: makeRollingUpdateStrategy(infrav1exp.MachineRollingUpdateDeployment{DeletePolicy: infrav1exp.OldestDeletePolicyType}),