diff --git a/internal/controllers/machinedeployment/machinedeployment_rolling.go b/internal/controllers/machinedeployment/machinedeployment_rolling.go index 83bd72f70d6e..d2e4a8556527 100644 --- a/internal/controllers/machinedeployment/machinedeployment_rolling.go +++ b/internal/controllers/machinedeployment/machinedeployment_rolling.go @@ -59,7 +59,7 @@ func (r *Reconciler) rolloutRolling(ctx context.Context, md *clusterv1.MachineDe planner.newMS = newMS planner.oldMSs = oldMSs - if err := planner.Plan(ctx); err != nil { + if err := planner.planRolloutRolling(ctx); err != nil { return err } @@ -100,8 +100,8 @@ func newRolloutPlanner() *rolloutPlanner { } } -// Plan determine how to proceed with the rollout if we are not yet at the desired state. -func (p *rolloutPlanner) Plan(ctx context.Context) error { +// planRolloutRolling determine how to proceed with the rollout when using the RolloutRolling strategy if the system is not yet at the desired state. +func (p *rolloutPlanner) planRolloutRolling(ctx context.Context) error { if p.md.Spec.Replicas == nil { return errors.Errorf("spec.replicas for MachineDeployment %v is nil, this is unexpected", client.ObjectKeyFromObject(p.md)) } @@ -122,7 +122,26 @@ func (p *rolloutPlanner) Plan(ctx context.Context) error { } // Scale down, if we can. - return p.reconcileOldMachineSets(ctx) + if err := p.reconcileOldMachineSetsRolloutRolling(ctx); err != nil { + return err + } + + // This func tries to detect and address the case when a rollout is not making progress because both scaling down and scaling up are blocked. + // Note: This func must be called after computing scale up/down intent for all the MachineSets. + // Note: This func only addresses deadlocks due to unavailable replicas not getting deleted on oldMSs, which can happen + // because reconcileOldMachineSetsRolloutRolling called above always assumes the worst case when deleting replicas e.g. + // - MD with spec.replicas 3, MaxSurge 1, MaxUnavailable 0 + // - OldMS with 3 replicas, 2 available replica (and thus 1 unavailable replica) + // - NewMS with 1 replica, 1 available replica + // - In theory it is possible to scale down oldMS from 3->2 replicas by deleting the unavailable replica. + // - However, reconcileOldMachineSetsRolloutRolling cannot assume that the MachineSet controller is going to delete + // the unavailable replica when scaling down from 3->2, because it might happen that one of the available replicas + // is deleted instead. + // - As a consequence reconcileOldMachineSetsRolloutRolling, which assumes the worst case when deleting replicas, did not scaled down oldMS. + // This situation, rollout not proceeding due to unavailable replicas, is considered a deadlock to be addressed by reconcileDeadlockBreaker. + // Note: Unblocking deadlocks when unavailable replicas exist only on oldMSs, is required also because replicas on oldMSs are not remediated by MHC. + p.reconcileDeadlockBreaker(ctx) + return nil } func (p *rolloutPlanner) reconcileNewMachineSet(ctx context.Context) error { @@ -159,205 +178,226 @@ func (p *rolloutPlanner) reconcileNewMachineSet(ctx context.Context) error { return nil } -func (p *rolloutPlanner) reconcileOldMachineSets(ctx context.Context) error { - log := ctrl.LoggerFrom(ctx) +func (p *rolloutPlanner) reconcileOldMachineSetsRolloutRolling(ctx context.Context) error { + allMSs := append(p.oldMSs, p.newMS) - oldMachinesCount := mdutil.GetReplicaCountForMachineSets(p.oldMSs) - if oldMachinesCount == 0 { - // Can't scale down further + // no op if there are no replicas on old machinesets + if mdutil.GetReplicaCountForMachineSets(p.oldMSs) == 0 { return nil } - newMSReplicas := ptr.Deref(p.newMS.Spec.Replicas, 0) - if v, ok := p.scaleIntents[p.newMS.Name]; ok { - newMSReplicas = v - } - allMachinesCount := oldMachinesCount + newMSReplicas - log.V(4).Info("New MachineSet has available machines", - "machineset", client.ObjectKeyFromObject(p.newMS).String(), "available-replicas", ptr.Deref(p.newMS.Status.AvailableReplicas, 0)) maxUnavailable := mdutil.MaxUnavailable(*p.md) + minAvailable := max(ptr.Deref(p.md.Spec.Replicas, 0)-maxUnavailable, 0) - // Check if we can scale down. We can scale down in the following 2 cases: - // * Some old MachineSets have unhealthy replicas, we could safely scale down those unhealthy replicas since that won't further - // increase unavailability. - // * New MachineSet has scaled up and it's replicas becomes ready, then we can scale down old MachineSets in a further step. - // - // maxScaledDown := allMachinesCount - minAvailable - newMachineSetMachinesUnavailable - // take into account not only maxUnavailable and any surge machines that have been created, but also unavailable machines from - // the newMS, so that the unavailable machines from the newMS would not make us scale down old MachineSets in a further - // step(that will increase unavailability). - // - // Concrete example: - // - // * 10 replicas - // * 2 maxUnavailable (absolute number, not percent) - // * 3 maxSurge (absolute number, not percent) - // - // case 1: - // * Deployment is updated, newMS is created with 3 replicas, oldMS is scaled down to 8, and newMS is scaled up to 5. - // * The new MachineSet machines crashloop and never become available. - // * allMachinesCount is 13. minAvailable is 8. newMSMachinesUnavailable is 5. - // * A node fails and causes one of the oldMS machines to become unavailable. However, 13 - 8 - 5 = 0, so the oldMS won't be scaled down. - // * The user notices the crashloop and does kubectl rollout undo to rollback. - // * newMSMachinesUnavailable is 1, since we rolled back to the good MachineSet, so maxScaledDown = 13 - 8 - 1 = 4. 4 of the crashlooping machines will be scaled down. - // * The total number of machines will then be 9 and the newMS can be scaled up to 10. - // - // case 2: - // Same example, but pushing a new machine template instead of rolling back (aka "roll over"): - // * The new MachineSet created must start with 0 replicas because allMachinesCount is already at 13. - // * However, newMSMachinesUnavailable would also be 0, so the 2 old MachineSets could be scaled down by 5 (13 - 8 - 0), which would then - // allow the new MachineSet to be scaled up by 5. - availableReplicas := ptr.Deref(p.newMS.Status.AvailableReplicas, 0) - - minAvailable := *(p.md.Spec.Replicas) - maxUnavailable - newMSUnavailableMachineCount := newMSReplicas - availableReplicas - maxScaledDown := allMachinesCount - minAvailable - newMSUnavailableMachineCount - if maxScaledDown <= 0 { - return nil + totalSpecReplicas := mdutil.GetReplicaCountForMachineSets(allMSs) + + // Find the total number of available replicas. + totalAvailableReplicas := ptr.Deref(mdutil.GetAvailableReplicaCountForMachineSets(allMSs), 0) + + // Find the number of pending scale down from previous reconcile/from current reconcile. + // This is required because whenever the system is reducing the number of replicas, this operation could further impact availability e.g. + // - in case of regular rollouts, there is no certainty about which replica is going to be deleted (and if the replica being deleted is currently available or not): + // - e.g. MS controller is going to first delete replicas with deletion annotation; also MS controller has a slightly different notion of unavailable as of now. + // - in case of in-place rollout, in-place updates are always assumed as impacting availability (they can always fail). + totalPendingScaleDown := int32(0) + for _, ms := range allMSs { + scaleIntent := ptr.Deref(ms.Spec.Replicas, 0) + if v, ok := p.scaleIntents[ms.Name]; ok { + scaleIntent = min(scaleIntent, v) + } + + // NOTE: Count only pending scale down from the current status.replicas (so scale down of existing replicas). + if scaleIntent < ptr.Deref(ms.Status.Replicas, 0) { + totalPendingScaleDown += max(ptr.Deref(ms.Status.Replicas, 0)-scaleIntent, 0) + } } - // Clean up unhealthy replicas first, otherwise unhealthy replicas will block deployment - // and cause timeout. See https://github.com/kubernetes/kubernetes/issues/16737 - cleanupCount, err := p.cleanupUnhealthyReplicas(ctx, maxScaledDown) - if err != nil { - return err + // Compute the total number of replicas that can be scaled down. + // Exit immediately if there is no room for scaling down. + // NOTE: this is a quick preliminary check to verify if there is room for scaling down any of the oldMSs; further down the code + // will make additional checks to ensure scale down actually happens without breaching MaxUnavailable, and + // if necessary, it will reduce the extent of the scale down accordingly. + totalScaleDownCount := max(totalSpecReplicas-totalPendingScaleDown-minAvailable, 0) + if totalScaleDownCount <= 0 { + return nil } - log.V(4).Info("Cleaned up unhealthy replicas from old MachineSets", "count", cleanupCount) + // Sort oldMSs so the system will start deleting from the oldest MS first. + sort.Sort(mdutil.MachineSetsByCreationTimestamp(p.oldMSs)) - // Scale down old MachineSets, need check maxUnavailable to ensure we can scale down - scaledDownCount, err := p.scaleDownOldMachineSetsForRollingUpdate(ctx) - if err != nil { - return err - } + // Scale down only unavailable replicas / up to residual totalScaleDownCount. + // NOTE: The system must scale down unavailable replicas first in order to increase chances for the rollout to progress. + // However, rollout planner must also take into account the fact that the MS controller might have a different opinion on + // which replica to delete when a scale down happens. + // + // As a consequence, the rollout planner cannot assume a scale down operation deletes an unavailable replica. e.g. + // - MD with spec.replicas 3, MaxSurge 1, MaxUnavailable 0 + // - OldMS with 3 replicas, 2 available replica (and thus 1 unavailable replica) + // - NewMS with 1 replica, 1 available replica + // - In theory it is possible to scale down oldMS from 3->2 replicas by deleting the unavailable replica. + // - However, rollout planner cannot assume that the MachineSet controller is going to delete + // the unavailable replica when scaling down from 3->2, because it might happen that one of the available replicas + // is deleted instead. + // + // In the example above, the scaleDownOldMSs should not scale down OldMS from 3->2 replicas. + // In other use cases, e.g. when scaling down an oldMS from 5->3 replicas, the scaleDownOldMSs should limit scale down extent + // only partially if this doesn't breach MaxUnavailable (e.g. scale down 5->4 instead of 5->3). + totalScaleDownCount, totalAvailableReplicas = p.scaleDownOldMSs(ctx, totalScaleDownCount, totalAvailableReplicas, minAvailable, true) + + // Then scale down old MS down to zero replicas / down to residual totalScaleDownCount. + // NOTE: Also in this case, we should continuously assess if reducing the number of replicase could further impact availability, + // and if necessary, limit scale down extent to ensure the operation respects MaxUnavailable limits. + _, _ = p.scaleDownOldMSs(ctx, totalScaleDownCount, totalAvailableReplicas, minAvailable, false) - log.V(4).Info("Scaled down old MachineSets of MachineDeployment", "count", scaledDownCount) return nil } -// cleanupUnhealthyReplicas will scale down old MachineSets with unhealthy replicas, so that all unhealthy replicas will be deleted. -func (p *rolloutPlanner) cleanupUnhealthyReplicas(ctx context.Context, maxCleanupCount int32) (int32, error) { +func (p *rolloutPlanner) scaleDownOldMSs(ctx context.Context, totalScaleDownCount, totalAvailableReplicas, minAvailable int32, scaleDownOnlyUnavailableReplicas bool) (int32, int32) { log := ctrl.LoggerFrom(ctx) - sort.Sort(mdutil.MachineSetsByCreationTimestamp(p.oldMSs)) - - // Scale down all old MachineSets with any unhealthy replicas. MachineSet will honour spec.deletion.order - // for deleting Machines. Machines with a deletion timestamp, with a failure message or without a nodeRef - // are preferred for all strategies. - // This results in a best effort to remove machines backing unhealthy nodes. - totalScaledDown := int32(0) - for _, oldMS := range p.oldMSs { - if oldMS.Spec.Replicas == nil { - return 0, errors.Errorf("spec.replicas for MachineSet %v is nil, this is unexpected", client.ObjectKeyFromObject(oldMS)) + // No op if there is no scaling down left. + if totalScaleDownCount <= 0 { + break } - if totalScaledDown >= maxCleanupCount { - break + replicas := ptr.Deref(oldMS.Spec.Replicas, 0) + if v, ok := p.scaleIntents[oldMS.Name]; ok { + replicas = v } - oldMSReplicas := *(oldMS.Spec.Replicas) - if oldMSReplicas == 0 { - // cannot scale down this MachineSet. + // No op if this MS has been already scaled down to zero. + if replicas <= 0 { continue } - oldMSAvailableReplicas := ptr.Deref(oldMS.Status.AvailableReplicas, 0) - log.V(4).Info("Found available Machines in old MachineSet", - "count", oldMSAvailableReplicas, "target-machineset", client.ObjectKeyFromObject(oldMS).String()) - if oldMSReplicas == oldMSAvailableReplicas { - // no unhealthy replicas found, no scaling required. + // Compute the scale down extent by considering either all replicas or, if scaleDownOnlyUnavailableReplicas is set, unavailable replicas only. + // In both cases, scale down is limited to totalScaleDownCount. + // Exit if there is no room for scaling down the MS. + // NOTE: this is a preliminary check to verify if there is room for scaling down this specific MS; further down the code + // will make additional checks to ensure scale down actually happens without breaching MaxUnavailable, and + // if necessary, it will reduce the extent of the scale down accordingly. + maxScaleDown := replicas + if scaleDownOnlyUnavailableReplicas { + maxScaleDown = max(replicas-ptr.Deref(oldMS.Status.AvailableReplicas, 0), 0) + } + scaleDown := min(maxScaleDown, totalScaleDownCount) + if scaleDown == 0 { continue } - // TODO(in-place): fix this logic - // It looks like that the current logic fails when the MD controller is called twice in a row, without MS controller being triggered in the between, e.g. - // - first reconcile scales down ms1, 6-->5 (-1) - // - second reconcile is not taking into account scales down already in progress, unhealthy count is wrongly computed as -1 instead of 0, this leads to increasing replica count instead of keeping it as it is (or scaling down), and then the safeguard below errors out. - - remainingCleanupCount := maxCleanupCount - totalScaledDown - unhealthyCount := oldMSReplicas - oldMSAvailableReplicas - scaledDownCount := min(remainingCleanupCount, unhealthyCount) - newReplicasCount := oldMSReplicas - scaledDownCount - - if newReplicasCount > oldMSReplicas { - return 0, errors.Errorf("when cleaning up unhealthy replicas, got invalid request to scale down %v: %d -> %d", - client.ObjectKeyFromObject(oldMS), oldMSReplicas, newReplicasCount) + // Before scaling down validate if the operation will lead to a breach of minAvailability. + // In order to do so, consider how many existing replicas will be actually deleted, and consider + // this operation as impacting availability because there are no guarantees that the MS controller is going to + // delete unavailable replicas first; if the projected state breaches minAvailability, reduce the scale down extend accordingly. + + // If there are no available replicas on this MS, scale down won't impact totalAvailableReplicas at all. + if ptr.Deref(oldMS.Status.AvailableReplicas, 0) > 0 { + // If instead there are AvailableReplicas on this MS: + // compute the new spec.replicas assuming we are going to use the entire scale down extent. + newSpecReplicas := max(replicas-scaleDown, 0) + + // compute how many existing replicas the operation is going to delete: + // e.g. if MS is scaling down spec.replicas from 5 to 3, but status.replicas is 4, it is scaling down 1 existing replica. + // e.g. if MS is scaling down spec.replicas from 5 to 3, but status.replicas is 6, it is scaling down 3 existing replicas. + existingReplicasToBeDeleted := max(ptr.Deref(oldMS.Status.Replicas, 0)-newSpecReplicas, 0) + + // If we are deleting at least one existing replicas + if existingReplicasToBeDeleted > 0 { + // Check if we are scaling down more existing replica than what is allowed by MaxUnavailability. + if totalAvailableReplicas-minAvailable < existingReplicasToBeDeleted { + // If we are scaling down more existing replica than what is allowed by MaxUnavailability, then + // rollout planner must revisit the scale down extent. + + // Determine how many replicas can be deleted overall without further impacting availability. + maxExistingReplicasThatCanBeDeleted := max(totalAvailableReplicas-minAvailable, 0) + + // Compute the revisited new spec.replicas: + // e.g. MS spec.replicas 20, scale down 8, newSpecReplicas 12 (20-8), status.replicas 15 -> existingReplicasToBeDeleted 3 (15-12) + // assuming that maxExistingReplicasThatCanBeDeleted is 2, newSpecReplicasRevisited should be 15-2 = 13 + // e.g. MS spec.replicas 16, scale down 3, newSpecReplicas 13 (16-3), status.replicas 21 -> existingReplicasToBeDeleted 8 (21-13) + // assuming that maxExistingReplicasThatCanBeDeleted is 7, newSpecReplicasRevisited should be 21-7 = 14 + // NOTE: there is a safeguard preventing to go above the initial replicas number (this is scale down oldMS). + newSpecReplicasRevisited := min(ptr.Deref(oldMS.Status.Replicas, 0)-maxExistingReplicasThatCanBeDeleted, replicas) + + // Re-compute the scale down extent by using newSpecReplicasRevisited. + scaleDown = max(replicas-newSpecReplicasRevisited, 0) + + // Re-compute how many existing replicas the operation is going to delete by using newSpecReplicasRevisited. + existingReplicasToBeDeleted = max(ptr.Deref(oldMS.Status.Replicas, 0)-newSpecReplicasRevisited, 0) + } + + // keep track that we are deleting existing replicas by assuming that this operation + // will reduce totalAvailableReplicas (worst scenario, deletion of available machines happen first). + totalAvailableReplicas -= min(ptr.Deref(oldMS.Status.AvailableReplicas, 0), existingReplicasToBeDeleted) + } } - scaleDownCount := *(oldMS.Spec.Replicas) - newReplicasCount - log.V(5).Info(fmt.Sprintf("Setting scale down intent for MachineSet %s to %d replicas (-%d)", oldMS.Name, newReplicasCount, scaleDownCount), "MachineSet", klog.KObj(oldMS)) - p.scaleIntents[oldMS.Name] = newReplicasCount - - totalScaledDown += scaledDownCount + if scaleDown > 0 { + newScaleIntent := max(replicas-scaleDown, 0) + log.V(5).Info(fmt.Sprintf("Setting scale down intent for MachineSet %s to %d replicas (-%d)", oldMS.Name, newScaleIntent, scaleDown), "MachineSet", klog.KObj(oldMS)) + p.scaleIntents[oldMS.Name] = newScaleIntent + totalScaleDownCount = max(totalScaleDownCount-scaleDown, 0) + } } - return totalScaledDown, nil + return totalScaleDownCount, totalAvailableReplicas } -// scaleDownOldMachineSetsForRollingUpdate scales down old MachineSets when deployment strategy is "RollingUpdate". -// Need check maxUnavailable to ensure availability. -func (p *rolloutPlanner) scaleDownOldMachineSetsForRollingUpdate(ctx context.Context) (int32, error) { +// This funcs tries to detect and address the case when a rollout is not making progress because both scaling down and scaling up are blocked. +// Note: This func must be called after computing scale up/down intent for all the MachineSets. +// Note: This func only address deadlock due to unavailable machines not getting deleted on oldMSs, e.g. due to a wrong configuration. +// Note: Unblocking deadlocks when unavailable replicas exist only on oldMSs, is required also because replicas on oldMSs are not remediated by MHC. +func (p *rolloutPlanner) reconcileDeadlockBreaker(ctx context.Context) { log := ctrl.LoggerFrom(ctx) allMSs := append(p.oldMSs, p.newMS) - maxUnavailable := mdutil.MaxUnavailable(*p.md) - minAvailable := *(p.md.Spec.Replicas) - maxUnavailable - - // Find the number of available machines. - availableMachineCount := ptr.Deref(mdutil.GetAvailableReplicaCountForMachineSets(allMSs), 0) - - // Check if we can scale down. - if availableMachineCount <= minAvailable { - // Cannot scale down. - return 0, nil + // if there are no replicas on the old MS, rollout is completed, no deadlock (actually no rollout in progress). + if ptr.Deref(mdutil.GetActualReplicaCountForMachineSets(p.oldMSs), 0) == 0 { + return } - log.V(4).Info("Found available machines in deployment, scaling down old MSes", "count", availableMachineCount) - - sort.Sort(mdutil.MachineSetsByCreationTimestamp(p.oldMSs)) - - // TODO(in-place): fix this logic - // It looks like that the current logic fails when the MD controller is called twice in a row e.g. reconcile of md 6 replicas MaxSurge=3, MaxUnavailable=1 and - // ms1, 6/5 replicas << one is scaling down, but scale down not yet processed by the MS controller. - // ms2, 3/3 replicas - // Leads to: - // ms1, 6/1 replicas << it further scaled down by 4, which leads to totAvailable machines is less than MinUnavailable, which should not happen - // ms2, 3/3 replicas + // if all the replicas on OldMS are available, no deadlock (regular scale up newMS and scale down oldMS should take over from here). + if ptr.Deref(mdutil.GetActualReplicaCountForMachineSets(p.oldMSs), 0) == ptr.Deref(mdutil.GetAvailableReplicaCountForMachineSets(p.oldMSs), 0) { + return + } - totalScaledDown := int32(0) - totalScaleDownCount := availableMachineCount - minAvailable - for _, oldMS := range p.oldMSs { - if totalScaledDown >= totalScaleDownCount { - // No further scaling required. - break + // If there are scale operation in progress, no deadlock. + // Note: we are considering both scale operation from previous and current reconcile. + for _, ms := range allMSs { + if ptr.Deref(ms.Spec.Replicas, 0) != ptr.Deref(ms.Status.Replicas, 0) { + return } - - oldMSReplicas := ptr.Deref(oldMS.Spec.Replicas, 0) - if v, ok := p.scaleIntents[oldMS.Name]; ok { - oldMSReplicas = v + if _, ok := p.scaleIntents[ms.Name]; ok { + return } + } - if oldMSReplicas == 0 { - // cannot scale down this MachineSet. - continue - } + // if there are unavailable replicas on the newMS, wait for them to become available first. + // Note: A rollout cannot be unblocked if new machines do not become available. + // Note: If the replicas on the newMS are not becoming available either: + // - automatic remediation can help in addressing temporary failures. + // - user intervention is required to fix more permanent issues e.g. to fix a wrong configuration. + if ptr.Deref(p.newMS.Status.AvailableReplicas, 0) != ptr.Deref(p.newMS.Status.Replicas, 0) { + return + } - // Scale down. - scaleDownCount := min(oldMSReplicas, totalScaleDownCount-totalScaledDown) - newReplicasCount := oldMSReplicas - scaleDownCount - if newReplicasCount > oldMSReplicas { - return totalScaledDown, errors.Errorf("when scaling down old MachineSet, got invalid request to scale down %v: %d -> %d", - client.ObjectKeyFromObject(oldMS), oldMSReplicas, newReplicasCount) + // At this point we can assume there is a deadlock that can only be remediated by breaching maxUnavailability constraint + // and scaling down an oldMS with unavailable machines by one. + // + // Note: In most cases this is only a formal violation of maxUnavailability, because there is a good chance + // that the machine that will be deleted is one of the unavailable machines. + // Note: This for loop relies on the same ordering of oldMSs that has been applied by reconcileOldMachineSetsRolloutRolling. + for _, oldMS := range p.oldMSs { + if ptr.Deref(oldMS.Status.AvailableReplicas, 0) == ptr.Deref(oldMS.Status.Replicas, 0) || ptr.Deref(oldMS.Spec.Replicas, 0) == 0 { + continue } - log.V(5).Info(fmt.Sprintf("Setting scale down intent for MachineSet %s to %d replicas (-%d)", oldMS.Name, newReplicasCount, scaleDownCount), "MachineSet", klog.KObj(oldMS)) - p.scaleIntents[oldMS.Name] = newReplicasCount - - totalScaledDown += scaleDownCount + newScaleIntent := max(ptr.Deref(oldMS.Spec.Replicas, 0)-1, 0) + log.Info(fmt.Sprintf("Setting scale down intent for MachineSet %s to %d replicas (-%d) to unblock rollout stuck due to unavailable Machine on oldMS", oldMS.Name, newScaleIntent, 1), "MachineSet", klog.KObj(oldMS)) + p.scaleIntents[oldMS.Name] = newScaleIntent + return } - - return totalScaledDown, nil } // cleanupDisableMachineCreateAnnotation will remove the disable machine create annotation from new MachineSets that were created during reconcileOldMachineSetsOnDelete. diff --git a/internal/controllers/machinedeployment/machinedeployment_rolling_test.go b/internal/controllers/machinedeployment/machinedeployment_rolling_test.go index 31e555e857fd..ab8bfff996ab 100644 --- a/internal/controllers/machinedeployment/machinedeployment_rolling_test.go +++ b/internal/controllers/machinedeployment/machinedeployment_rolling_test.go @@ -17,6 +17,7 @@ limitations under the License. package machinedeployment import ( + "context" "testing" . "github.com/onsi/gomega" @@ -24,6 +25,7 @@ import ( "k8s.io/utils/ptr" clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" + "sigs.k8s.io/cluster-api/internal/controllers/machinedeployment/mdutil" ) func TestReconcileNewMachineSet(t *testing.T) { @@ -267,150 +269,443 @@ func TestReconcileNewMachineSet(t *testing.T) { } } -func TestReconcileOldMachineSets(t *testing.T) { - testCases := []struct { - name string - machineDeployment *clusterv1.MachineDeployment - newMachineSet *clusterv1.MachineSet - oldMachineSets []*clusterv1.MachineSet - expectedOldMachineSetsReplicas int - error error +func Test_reconcileOldMachineSetsRolloutRolling(t *testing.T) { + var ctx = context.Background() + + tests := []struct { + name string + md *clusterv1.MachineDeployment + scaleIntent map[string]int32 + newMS *clusterv1.MachineSet + oldMSs []*clusterv1.MachineSet + expectScaleIntent map[string]int32 + skipMaxUnavailabilityCheck bool }{ { - name: "RollingUpdate strategy: Scale down old MachineSets when all new replicas are available", - machineDeployment: &clusterv1.MachineDeployment{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: "foo", - Name: "bar", - }, - Spec: clusterv1.MachineDeploymentSpec{ - Rollout: clusterv1.MachineDeploymentRolloutSpec{ - Strategy: clusterv1.MachineDeploymentRolloutStrategy{ - Type: clusterv1.RollingUpdateMachineDeploymentStrategyType, - RollingUpdate: clusterv1.MachineDeploymentRolloutStrategyRollingUpdate{ - MaxUnavailable: intOrStrPtr(1), - MaxSurge: intOrStrPtr(3), - }, - }, - }, - Replicas: ptr.To[int32](2), - }, + name: "no op if there are no replicas on old machinesets", + scaleIntent: map[string]int32{}, + md: createMD("v2", 10, withRolloutStrategy(1, 0)), + newMS: createMS("ms2", "v2", 10, withStatusReplicas(10), withStatusAvailableReplicas(10)), + oldMSs: []*clusterv1.MachineSet{ + createMS("ms1", "v1", 0, withStatusReplicas(0), withStatusAvailableReplicas(0)), }, - newMachineSet: &clusterv1.MachineSet{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: "foo", - Name: "bar", - }, - Spec: clusterv1.MachineSetSpec{ - Replicas: ptr.To[int32](0), - }, - Status: clusterv1.MachineSetStatus{ - AvailableReplicas: ptr.To[int32](2), - }, + expectScaleIntent: map[string]int32{ + // no new scale down intent for oldMSs (ms1): }, - oldMachineSets: []*clusterv1.MachineSet{ - { - ObjectMeta: metav1.ObjectMeta{ - Namespace: "foo", - Name: "2replicas", - }, - Spec: clusterv1.MachineSetSpec{ - Replicas: ptr.To[int32](2), - }, - Status: clusterv1.MachineSetStatus{ - AvailableReplicas: ptr.To[int32](2), - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Namespace: "foo", - Name: "1replicas", - }, - Spec: clusterv1.MachineSetSpec{ - Replicas: ptr.To[int32](1), - }, - Status: clusterv1.MachineSetStatus{ - AvailableReplicas: ptr.To[int32](1), - }, - }, + }, + { + name: "do not scale down if replicas is equal to minAvailable replicas", + scaleIntent: map[string]int32{}, + md: createMD("v2", 3, withRolloutStrategy(1, 0)), + newMS: createMS("ms2", "v2", 1, withStatusReplicas(1), withStatusAvailableReplicas(1)), + oldMSs: []*clusterv1.MachineSet{ + createMS("ms1", "v1", 2, withStatusReplicas(2), withStatusAvailableReplicas(2)), + }, + expectScaleIntent: map[string]int32{ + // no new scale down intent for oldMSs (ms1): + // 2 available replicas from ms1 + 1 available replica from ms2 = 3 available replicas == minAvailability, we cannot scale down }, - expectedOldMachineSetsReplicas: 0, }, { - name: "RollingUpdate strategy: It does not scale down old MachineSets when above maxUnavailable", - machineDeployment: &clusterv1.MachineDeployment{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: "foo", - Name: "bar", - }, - Spec: clusterv1.MachineDeploymentSpec{ - Rollout: clusterv1.MachineDeploymentRolloutSpec{ - Strategy: clusterv1.MachineDeploymentRolloutStrategy{ - Type: clusterv1.RollingUpdateMachineDeploymentStrategyType, - RollingUpdate: clusterv1.MachineDeploymentRolloutStrategyRollingUpdate{ - MaxUnavailable: intOrStrPtr(2), - MaxSurge: intOrStrPtr(3), - }, - }, - }, - Replicas: ptr.To[int32](10), - }, + name: "do not scale down if replicas is less then minAvailable replicas", + scaleIntent: map[string]int32{}, + md: createMD("v2", 3, withRolloutStrategy(1, 0)), + newMS: createMS("ms2", "v2", 1, withStatusReplicas(1), withStatusAvailableReplicas(1)), + oldMSs: []*clusterv1.MachineSet{ + createMS("ms1", "v1", 2, withStatusReplicas(2), withStatusAvailableReplicas(1)), }, - newMachineSet: &clusterv1.MachineSet{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: "foo", - Name: "bar", - }, - Spec: clusterv1.MachineSetSpec{ - Replicas: ptr.To[int32](5), - }, - Status: clusterv1.MachineSetStatus{ - Replicas: ptr.To[int32](5), - }, + expectScaleIntent: map[string]int32{ + // no new scale down intent for oldMSs (ms1): + // 1 available replicas from ms1 + 1 available replica from ms2 = 2 available replicas < minAvailability, we cannot scale down }, - oldMachineSets: []*clusterv1.MachineSet{ - { - ObjectMeta: metav1.ObjectMeta{ - Namespace: "foo", - Name: "8replicas", - }, - Spec: clusterv1.MachineSetSpec{ - Replicas: ptr.To[int32](8), - }, - Status: clusterv1.MachineSetStatus{ - Replicas: ptr.To[int32](10), - ReadyReplicas: ptr.To[int32](8), - AvailableReplicas: ptr.To[int32](8), - }, - }, + skipMaxUnavailabilityCheck: true, + }, + { + name: "do not scale down if there are more replicas than minAvailable replicas, but scale down from a previous reconcile already takes the availability buffer", + scaleIntent: map[string]int32{}, + md: createMD("v2", 3, withRolloutStrategy(1, 0)), + newMS: createMS("ms2", "v2", 1, withStatusReplicas(1), withStatusAvailableReplicas(1)), + oldMSs: []*clusterv1.MachineSet{ + createMS("ms1", "v1", 2, withStatusReplicas(3), withStatusAvailableReplicas(3)), // OldMS is scaling down from a previous reconcile + }, + expectScaleIntent: map[string]int32{ + // no new scale down intent for oldMSs (ms1): + // 3 available replicas from ms1 - 1 replica already scaling down from ms1 + 1 available replica from ms2 = 3 available replicas == minAvailability, we cannot further scale down + }, + }, + { + name: "do not scale down if there are more replicas than minAvailable replicas, but scale down from a previous reconcile already takes the availability buffer, scale down from a previous reconcile on another MS", + scaleIntent: map[string]int32{}, + md: createMD("v2", 6, withRolloutStrategy(3, 1)), + newMS: createMS("ms3", "v2", 3, withStatusReplicas(0), withStatusAvailableReplicas(0)), // NewMS is scaling up from previous reconcile, but replicas do not exist yet + oldMSs: []*clusterv1.MachineSet{ + createMS("ms1", "v1", 2, withStatusReplicas(3), withStatusAvailableReplicas(3)), // OldMS is scaling down from a previous reconcile + createMS("ms2", "v1", 3, withStatusReplicas(3), withStatusAvailableReplicas(3)), // OldMS + }, + expectScaleIntent: map[string]int32{ + // no new scale down intent for oldMSs (ms1): + // 3 available replicas from ms1 - 1 replica already scaling down from ms1 + 3 available replica from ms2 = 5 available replicas == minAvailability, we cannot further scale down + }, + }, + { + name: "do not scale down if there are more replicas than minAvailable replicas, but scale down from current reconcile already takes the availability buffer (newMS is scaling down)", + scaleIntent: map[string]int32{ + "ms2": 1, // newMS (ms2) has a scaling down intent from current reconcile + }, + md: createMD("v2", 3, withRolloutStrategy(1, 0)), + newMS: createMS("ms2", "v2", 2, withStatusReplicas(2), withStatusAvailableReplicas(2)), + oldMSs: []*clusterv1.MachineSet{ + createMS("ms1", "v1", 2, withStatusReplicas(2), withStatusAvailableReplicas(2)), + }, + expectScaleIntent: map[string]int32{ + "ms2": 1, + // no new scale down intent for oldMSs (ms1): + // 2 available replicas from ms1 + 2 available replicas from ms2 - 1 replica already scaling down from ms2 = 3 available replicas == minAvailability, we cannot further scale down + }, + }, + { + name: "do not scale down if there are more replicas than minAvailable replicas, but scale down from current reconcile already takes the availability buffer (oldMS is scaling down)", + scaleIntent: map[string]int32{ + "ms1": 1, // oldMS (ms1) has a scaling down intent from current reconcile + }, + md: createMD("v2", 3, withRolloutStrategy(1, 0)), + newMS: createMS("ms2", "v2", 2, withStatusReplicas(2), withStatusAvailableReplicas(2)), + oldMSs: []*clusterv1.MachineSet{ + createMS("ms1", "v1", 2, withStatusReplicas(2), withStatusAvailableReplicas(2)), + }, + expectScaleIntent: map[string]int32{ + // no new scale down intent for oldMSs: + "ms1": 1, + // 2 available replicas from ms1 - 1 replica already scaling down from ms1 + 2 available replicas from ms2 = 3 available replicas == minAvailability, we cannot further scale down + }, + }, + { + name: "do not scale down replicas when there are more replicas than minAvailable replicas, but not all the replicas are available (unavailability on newMS)", + scaleIntent: map[string]int32{}, + md: createMD("v2", 3, withRolloutStrategy(1, 0)), + newMS: createMS("ms2", "v2", 1, withStatusReplicas(1), withStatusAvailableReplicas(0)), // no replicas are available + oldMSs: []*clusterv1.MachineSet{ + createMS("ms1", "v1", 3, withStatusReplicas(3), withStatusAvailableReplicas(3)), + }, + expectScaleIntent: map[string]int32{ + // no new scale down intent for oldMSs (ms1): + // 3 available replicas from ms1 + 0 available replicas from ms2 = 3 available replicas == minAvailability, we cannot further scale down + }, + }, + { + name: "do not scale down replicas when there are more replicas than minAvailable replicas, but not all the replicas are available (unavailability on oldMS)", + scaleIntent: map[string]int32{}, + md: createMD("v2", 3, withRolloutStrategy(1, 0)), + newMS: createMS("ms2", "v2", 1, withStatusReplicas(1), withStatusAvailableReplicas(1)), + oldMSs: []*clusterv1.MachineSet{ + createMS("ms1", "v1", 3, withStatusReplicas(3), withStatusAvailableReplicas(2)), // only 2 replicas are available + }, + expectScaleIntent: map[string]int32{ + // no new scale down intent for oldMSs (ms1): + // 2 available replicas from ms1 + 1 available replicas from ms2 = 3 available replicas == minAvailability, we cannot further scale down + }, + }, + { + name: "scale down replicas when there are more replicas than minAvailable replicas, all replicas are available", + scaleIntent: map[string]int32{}, + md: createMD("v2", 3, withRolloutStrategy(1, 0)), + newMS: createMS("ms2", "v2", 1, withStatusReplicas(1), withStatusAvailableReplicas(1)), + oldMSs: []*clusterv1.MachineSet{ + createMS("ms1", "v1", 3, withStatusReplicas(3), withStatusAvailableReplicas(3)), + }, + expectScaleIntent: map[string]int32{ + // new scale down intent for oldMSs (ms1): + "ms1": 2, // 3 available replicas from ms1 + 1 available replicas from ms2 = 4 available replicas > minAvailability, scale down to 2 replicas (-1) + }, + }, + { + name: "scale down replicas when there are more replicas than minAvailable replicas, unavailable replicas are scaled down first", + scaleIntent: map[string]int32{}, + md: createMD("v2", 3, withRolloutStrategy(1, 0)), + newMS: createMS("ms4", "v3", 1, withStatusReplicas(1), withStatusAvailableReplicas(1)), + oldMSs: []*clusterv1.MachineSet{ + createMS("ms1", "v0", 2, withStatusReplicas(2), withStatusAvailableReplicas(2)), + createMS("ms2", "v1", 1, withStatusReplicas(1), withStatusAvailableReplicas(0)), // no replicas are available + createMS("ms3", "v2", 2, withStatusReplicas(2), withStatusAvailableReplicas(0)), // no replicas are available + }, + expectScaleIntent: map[string]int32{ + // new scale down intent for oldMSs: + // ms1 skipped in the first iteration because it does not have any unavailable replica + "ms2": 0, // 0 available replicas from ms2, it can be scaled down without impact on availability (-1) + "ms3": 0, // 0 available replicas from ms3, it can be scaled down without impact on availability (-2) + // no need to further scale down. + }, + }, + { + name: "scale down replicas when there are more replicas than minAvailable replicas, unavailable replicas are scaled down first, available replicas are scaled down when unavailable replicas are gone", + scaleIntent: map[string]int32{}, + md: createMD("v2", 3, withRolloutStrategy(1, 0)), + newMS: createMS("ms4", "v3", 1, withStatusReplicas(1), withStatusAvailableReplicas(1)), + oldMSs: []*clusterv1.MachineSet{ + createMS("ms1", "v0", 3, withStatusReplicas(3), withStatusAvailableReplicas(3)), + createMS("ms2", "v1", 1, withStatusReplicas(1), withStatusAvailableReplicas(0)), // no replicas are available + createMS("ms3", "v2", 2, withStatusReplicas(2), withStatusAvailableReplicas(0)), // no replicas are available + }, + expectScaleIntent: map[string]int32{ + // new scale down intent for oldMSs: + // ms1 skipped in the first iteration because it does not have any unavailable replica + "ms2": 0, // 0 available replicas from ms2, it can be scaled down to 0 without any impact on availability (-1) + "ms3": 0, // 0 available replicas from ms3, it can be scaled down to 0 without any impact on availability (-2) + "ms1": 2, // 3 available replicas from ms1 + 1 available replica from ms4 = 4 available replicas > minAvailability, scale down to 2 replicas (-1) + }, + }, + { + name: "scale down replicas when there are more replicas than minAvailable replicas, unavailable replicas are scaled down first, available replicas are scaled down when unavailable replicas are gone is not affected by replicas without machines", + scaleIntent: map[string]int32{}, + md: createMD("v2", 3, withRolloutStrategy(1, 0)), + newMS: createMS("ms4", "v3", 1, withStatusReplicas(1), withStatusAvailableReplicas(1)), + oldMSs: []*clusterv1.MachineSet{ + createMS("ms1", "v0", 4, withStatusReplicas(3), withStatusAvailableReplicas(3)), // 1 replica without machine + createMS("ms2", "v1", 2, withStatusReplicas(1), withStatusAvailableReplicas(0)), // no replicas are available, 1 replica without machine + createMS("ms3", "v2", 5, withStatusReplicas(2), withStatusAvailableReplicas(0)), // no replicas are available, 3 replicas without machines + }, + expectScaleIntent: map[string]int32{ + // new scale down intent for oldMSs: + // ms1 skipped in the first iteration because it does not have any unavailable replica + "ms2": 0, // 0 available replicas from ms2, it can be scaled down to 0 without any impact on availability (-2) + "ms3": 0, // 0 available replicas from ms3, it can be scaled down to 0 without any impact on availability (-5) + "ms1": 2, // 3 available replicas from ms1 + 1 available replica from ms4 = 4 available replicas > minAvailability, scale down to 2 replicas, also drop the replica without machine (-2) + }, + }, + { + name: "scale down replicas when there are more replicas than minAvailable replicas, unavailable replicas are scaled down first, scale down stops before breaching minAvailable replicas", + scaleIntent: map[string]int32{}, + md: createMD("v2", 3, withRolloutStrategy(1, 0)), + newMS: createMS("ms4", "v3", 1, withStatusReplicas(1), withStatusAvailableReplicas(1)), + oldMSs: []*clusterv1.MachineSet{ + createMS("ms1", "v0", 1, withStatusReplicas(1), withStatusAvailableReplicas(1)), + createMS("ms2", "v1", 1, withStatusReplicas(1), withStatusAvailableReplicas(0)), // no replicas are available + createMS("ms3", "v2", 2, withStatusReplicas(2), withStatusAvailableReplicas(1)), // only 1 replica is available + }, + expectScaleIntent: map[string]int32{ + // new scale down intent for oldMSs: + "ms2": 0, // 0 available replicas from ms2, it can be scaled down to 0 without any impact on availability (-1) + // even if there is still room to scale down, we cannot scale down ms3: 1 available replica from ms1 + 1 available replica from ms4 = 2 available replicas < minAvailability + // does not make sense to continue scale down as there is no guarantee that MS3 would remove the unavailable replica + }, + }, + { + name: "scale down replicas when there are more replicas than minAvailable replicas, unavailable replicas are scaled down first, scale down stops before breaching minAvailable replicas is not affected by replicas without machines", + scaleIntent: map[string]int32{}, + md: createMD("v2", 3, withRolloutStrategy(1, 0)), + newMS: createMS("ms4", "v3", 1, withStatusReplicas(1), withStatusAvailableReplicas(1)), + oldMSs: []*clusterv1.MachineSet{ + createMS("ms1", "v0", 2, withStatusReplicas(1), withStatusAvailableReplicas(1)), // 1 replica without machine + createMS("ms2", "v1", 3, withStatusReplicas(1), withStatusAvailableReplicas(0)), // no replicas are available, 2 replica without machines + createMS("ms3", "v2", 3, withStatusReplicas(2), withStatusAvailableReplicas(1)), // only 1 replica is available, 1 replica without machine + }, + expectScaleIntent: map[string]int32{ + // new scale down intent for oldMSs: + "ms1": 1, // 1 replica without machine, it can be scaled down to 1 without any impact on availability (-1) + "ms2": 0, // 2 replica without machine, 0 available replicas from ms2, it can be scaled down to 0 without any impact on availability (-3) + "ms3": 2, // 1 replica without machine, it can be scaled down to 2 without any impact on availability (-1); even if there is still room to scale down, we cannot further scale down ms3: 1 available replica from ms1 + 1 available replica from ms4 = 2 available replicas < minAvailability + // does not make sense to continue scale down. + }, + }, + { + name: "scale down replicas when there are more replicas than minAvailable replicas, scale down keeps into account scale downs from a previous reconcile", + scaleIntent: map[string]int32{}, + md: createMD("v2", 3, withRolloutStrategy(1, 0)), + newMS: createMS("ms2", "v3", 2, withStatusReplicas(2), withStatusAvailableReplicas(2)), + oldMSs: []*clusterv1.MachineSet{ + createMS("ms1", "v0", 3, withStatusReplicas(4), withStatusAvailableReplicas(4)), // OldMS is scaling down from a previous reconcile + }, + expectScaleIntent: map[string]int32{ + // new scale down intent for oldMSs: + "ms1": 2, // 4 available replicas from ms1 - 1 replica already scaling down from ms1 + 2 available replicas from ms2 = 5 available replicas > minAvailability, scale down to 2 (-1) + }, + }, + { + name: "scale down replicas when there are more replicas than minAvailable replicas, scale down keeps into account scale downs from the current reconcile (newMS is scaling down)", + scaleIntent: map[string]int32{ + "ms2": 1, // newMS (ms2) has a scaling down intent from current reconcile + }, + md: createMD("v2", 3, withRolloutStrategy(1, 0)), + newMS: createMS("ms2", "v3", 2, withStatusReplicas(2), withStatusAvailableReplicas(2)), + oldMSs: []*clusterv1.MachineSet{ + createMS("ms1", "v0", 3, withStatusReplicas(3), withStatusAvailableReplicas(3)), + }, + expectScaleIntent: map[string]int32{ + "ms2": 1, + // new scale down intent for oldMSs: + "ms1": 2, // 3 available replicas from ms1 + 2 available replicas from ms2 - 1 replica already scaling down from ms2 = 4 available replicas > minAvailability, scale down to 2 (-1) + }, + }, + { + name: "scale down replicas when there are more replicas than minAvailable replicas, scale down keeps into account scale downs from the current reconcile (oldMS is scaling down)", + scaleIntent: map[string]int32{ + "ms1": 2, // oldMS (ms1) has a scaling down intent from current reconcile + }, + md: createMD("v2", 3, withRolloutStrategy(1, 0)), + newMS: createMS("ms2", "v3", 2, withStatusReplicas(2), withStatusAvailableReplicas(2)), + oldMSs: []*clusterv1.MachineSet{ + createMS("ms1", "v0", 3, withStatusReplicas(3), withStatusAvailableReplicas(3)), + }, + expectScaleIntent: map[string]int32{ + // new scale down intent for oldMSs: + "ms1": 1, // 3 available replicas from ms1 - 1 replica already scaling down from ms1 + 2 available replicas from ms2 = 4 available replicas > minAvailability, scale down to 1 (-1) }, - expectedOldMachineSetsReplicas: 8, }, } - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { g := NewWithT(t) - planner := newRolloutPlanner() - planner.md = tc.machineDeployment - planner.newMS = tc.newMachineSet - planner.oldMSs = tc.oldMachineSets - - err := planner.reconcileOldMachineSets(ctx) - if tc.error != nil { - g.Expect(err).To(HaveOccurred()) - g.Expect(err.Error()).To(BeEquivalentTo(tc.error.Error())) - return + p := &rolloutPlanner{ + md: tt.md, + newMS: tt.newMS, + oldMSs: tt.oldMSs, + scaleIntents: tt.scaleIntent, } - + err := p.reconcileOldMachineSetsRolloutRolling(ctx) g.Expect(err).ToNot(HaveOccurred()) - for i := range tc.oldMachineSets { - scaleIntent := ptr.Deref(tc.oldMachineSets[i].Spec.Replicas, 0) - if v, ok := planner.scaleIntents[tc.oldMachineSets[i].Name]; ok { - scaleIntent = v + g.Expect(p.scaleIntents).To(Equal(tt.expectScaleIntent), "unexpected scaleIntents") + + // Check we are not breaching minAvailability by simulating what will happen by applying intent + worst scenario when a machine deletion is always an available machine deletion. + for _, oldMS := range tt.oldMSs { + scaleIntent, ok := p.scaleIntents[oldMS.Name] + if !ok { + continue } - g.Expect(scaleIntent).To(BeEquivalentTo(tc.expectedOldMachineSetsReplicas)) + machineScaleDown := max(ptr.Deref(oldMS.Status.Replicas, 0)-scaleIntent, 0) + if machineScaleDown > 0 { + oldMS.Status.AvailableReplicas = ptr.To(max(ptr.Deref(oldMS.Status.AvailableReplicas, 0)-machineScaleDown, 0)) + } + } + minAvailableReplicas := ptr.Deref(tt.md.Spec.Replicas, 0) - mdutil.MaxUnavailable(*tt.md) + totAvailableReplicas := ptr.Deref(mdutil.GetAvailableReplicaCountForMachineSets(append(tt.oldMSs, tt.newMS)), 0) + if !tt.skipMaxUnavailabilityCheck { + g.Expect(totAvailableReplicas).To(BeNumerically(">=", minAvailableReplicas), "totAvailable machines is less than minAvailable") + } else { + t.Logf("skipping MaxUnavailability check (totAvailableReplicas: %d, minAvailableReplicas: %d)", totAvailableReplicas, minAvailableReplicas) + } + }) + } +} + +func Test_reconcileDeadlockBreaker(t *testing.T) { + var ctx = context.Background() + + tests := []struct { + name string + scaleIntent map[string]int32 + newMS *clusterv1.MachineSet + oldMSs []*clusterv1.MachineSet + expectScaleIntent map[string]int32 + skipMaxUnavailabilityCheck bool + }{ + { + name: "no op if there are no replicas on old machinesets", + scaleIntent: map[string]int32{}, + newMS: createMS("ms2", "v2", 10, withStatusReplicas(10), withStatusAvailableReplicas(10)), + oldMSs: []*clusterv1.MachineSet{ + createMS("ms1", "v1", 0, withStatusReplicas(0), withStatusAvailableReplicas(0)), // there no replicas, not a deadlock, we are actually at desired state + }, + expectScaleIntent: map[string]int32{ + // no new scale down intent for oldMSs (ms1): + }, + }, + { + name: "no op if all the replicas on OldMS are available", + scaleIntent: map[string]int32{}, + newMS: createMS("ms2", "v2", 5, withStatusReplicas(5), withStatusAvailableReplicas(5)), + oldMSs: []*clusterv1.MachineSet{ + createMS("ms1", "v1", 5, withStatusReplicas(5), withStatusAvailableReplicas(5)), // there no unavailable replicas, not a deadlock (rollout will continue as usual) + }, + expectScaleIntent: map[string]int32{ + // no new scale down intent for oldMSs (ms1): + }, + }, + { + name: "no op if there are scale operation still in progress from a previous reconcile on newMS", + scaleIntent: map[string]int32{}, + newMS: createMS("ms2", "v2", 6, withStatusReplicas(5), withStatusAvailableReplicas(5)), // scale up from a previous reconcile + oldMSs: []*clusterv1.MachineSet{ + createMS("ms1", "v1", 5, withStatusReplicas(5), withStatusAvailableReplicas(4)), // there is at least one unavailable replica, not yet considered deadlock because scale operation still in progress + }, + expectScaleIntent: map[string]int32{ + // no new scale down intent for oldMSs (ms1): + }, + }, + { + name: "no op if there are scale operation still in progress from a previous reconcile on oldMS", + scaleIntent: map[string]int32{}, + newMS: createMS("ms2", "v2", 5, withStatusReplicas(5), withStatusAvailableReplicas(5)), + oldMSs: []*clusterv1.MachineSet{ + createMS("ms1", "v1", 4, withStatusReplicas(5), withStatusAvailableReplicas(4)), // there is at least one unavailable replica, not yet considered deadlock because scale operation still in progress + }, + expectScaleIntent: map[string]int32{ + // no new scale down intent for oldMSs (ms1): + }, + }, + { + name: "no op if there are scale operation from the current reconcile on newMS", + scaleIntent: map[string]int32{ + "ms2": 6, // scale up intent for newMS + }, + newMS: createMS("ms2", "v2", 5, withStatusReplicas(5), withStatusAvailableReplicas(5)), + oldMSs: []*clusterv1.MachineSet{ + createMS("ms1", "v1", 5, withStatusReplicas(5), withStatusAvailableReplicas(4)), // there is at least one unavailable replica, not yet considered deadlock because scale operation still in progress + }, + expectScaleIntent: map[string]int32{ + "ms2": 6, + // no new scale down intent for oldMSs (ms1): + }, + }, + { + name: "no op if there are scale operation from the current reconcile on oldMS", + scaleIntent: map[string]int32{ + "ms1": 4, // scale up intent for oldMS + }, + newMS: createMS("ms2", "v2", 5, withStatusReplicas(5), withStatusAvailableReplicas(5)), + oldMSs: []*clusterv1.MachineSet{ + createMS("ms1", "v1", 5, withStatusReplicas(5), withStatusAvailableReplicas(4)), // there is at least one unavailable replica, not yet considered deadlock because scale operation still in progress + }, + expectScaleIntent: map[string]int32{ + // no new scale down intent for oldMSs (ms1): + "ms1": 4, + }, + }, + { + name: "wait for unavailable replicas on the newMS if any", + scaleIntent: map[string]int32{}, + newMS: createMS("ms2", "v2", 5, withStatusReplicas(5), withStatusAvailableReplicas(3)), // one unavailable replica + oldMSs: []*clusterv1.MachineSet{ + createMS("ms1", "v1", 5, withStatusReplicas(5), withStatusAvailableReplicas(4)), // there is at least one unavailable replica, no scale operations in progress, potential deadlock, but the system must wait until all replica on newMS are available before unblocking further deletion on oldMS. + }, + expectScaleIntent: map[string]int32{ + // no new scale down intent for oldMSs (ms1): + }, + }, + { + name: "unblock a deadlock when necessary", + scaleIntent: map[string]int32{}, + newMS: createMS("ms2", "v2", 5, withStatusReplicas(5), withStatusAvailableReplicas(5)), + oldMSs: []*clusterv1.MachineSet{ + createMS("ms1", "v1", 5, withStatusReplicas(5), withStatusAvailableReplicas(3)), // there is at least one unavailable replica, all replicas on newMS available, no scale operations in progress, deadlock + }, + expectScaleIntent: map[string]int32{ + // new scale down intent for oldMSs (ms1): + "ms1": 4, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + + p := &rolloutPlanner{ + newMS: tt.newMS, + oldMSs: tt.oldMSs, + scaleIntents: tt.scaleIntent, } + p.reconcileDeadlockBreaker(ctx) + g.Expect(p.scaleIntents).To(Equal(tt.expectScaleIntent), "unexpected scaleIntents") }) } } diff --git a/internal/controllers/machinedeployment/machinedeployment_rollout_sequence_test.go b/internal/controllers/machinedeployment/machinedeployment_rollout_sequence_test.go index 6c890235d647..bd42251386ec 100644 --- a/internal/controllers/machinedeployment/machinedeployment_rollout_sequence_test.go +++ b/internal/controllers/machinedeployment/machinedeployment_rollout_sequence_test.go @@ -28,6 +28,7 @@ import ( "time" "github.com/google/go-cmp/cmp" + "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" @@ -39,7 +40,7 @@ import ( "sigs.k8s.io/cluster-api/internal/controllers/machinedeployment/mdutil" ) -type rolloutSequenceTestCase struct { +type rolloutRollingSequenceTestCase struct { name string maxSurge int32 maxUnavailable int32 @@ -107,11 +108,12 @@ type rolloutSequenceTestCase struct { seed int64 } -func Test_rolloutSequencesWithPredictableReconcileOrder(t *testing.T) { +func Test_rolloutRollingSequences(t *testing.T) { ctx := context.Background() ctx = ctrl.LoggerInto(ctx, klog.Background()) + klog.SetOutput(ginkgo.GinkgoWriter) - tests := []rolloutSequenceTestCase{ + tests := []rolloutRollingSequenceTestCase{ // Regular rollout (no in-place) { // scale out by 1 @@ -161,7 +163,7 @@ func Test_rolloutSequencesWithPredictableReconcileOrder(t *testing.T) { maxSurge: 3, maxUnavailable: 1, currentScope: &rolloutScope{ // Manually providing a scope simulating a MD originally with 6 replica in the middle of a rollout, with 3 machines already created in the newMS and 3 still on the oldMS, and then MD scaled up to 12. - machineDeployment: createMD("v2", 12, 3, 1), + machineDeployment: createMD("v2", 12, withRolloutStrategy(3, 1)), machineSets: []*clusterv1.MachineSet{ createMS("ms1", "v1", 3), createMS("ms2", "v2", 3), @@ -189,7 +191,7 @@ func Test_rolloutSequencesWithPredictableReconcileOrder(t *testing.T) { maxSurge: 3, maxUnavailable: 1, currentScope: &rolloutScope{ // Manually providing a scope simulating a MD originally with 12 replica in the middle of a rollout, with 3 machines already created in the newMS and 9 still on the oldMS, and then MD scaled down to 6. - machineDeployment: createMD("v2", 6, 3, 1), + machineDeployment: createMD("v2", 6, withRolloutStrategy(3, 1)), machineSets: []*clusterv1.MachineSet{ createMS("ms1", "v1", 9), createMS("ms2", "v2", 3), @@ -223,7 +225,7 @@ func Test_rolloutSequencesWithPredictableReconcileOrder(t *testing.T) { maxSurge: 3, maxUnavailable: 1, currentScope: &rolloutScope{ // Manually providing a scope simulating a MD with 6 replica in the middle of a rollout, with 3 machines already created in the newMS and 3 still on the oldMS, and then MD spec is changed. - machineDeployment: createMD("v3", 6, 3, 1), + machineDeployment: createMD("v3", 6, withRolloutStrategy(3, 1)), machineSets: []*clusterv1.MachineSet{ createMS("ms1", "v1", 3), createMS("ms2", "v2", 3), @@ -249,9 +251,8 @@ func Test_rolloutSequencesWithPredictableReconcileOrder(t *testing.T) { } testWithPredictableReconcileOrder := true - // TODO(in-place): enable tests with random reconcile order as soon as the issues in reconcileOldMachineSets are fixed - testWithRandomReconcileOrderFromConstantSeed := false - testWithRandomReconcileOrderFromRandomSeed := false + testWithRandomReconcileOrderFromConstantSeed := true + testWithRandomReconcileOrderFromRandomSeed := true for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -260,11 +261,9 @@ func Test_rolloutSequencesWithPredictableReconcileOrder(t *testing.T) { if testWithPredictableReconcileOrder { tt.maxIterations = 50 tt.randomControllerOrder = false - if tt.logAndGoldenFileName == "" { - tt.logAndGoldenFileName = strings.ToLower(tt.name) - } + tt.logAndGoldenFileName = strings.ToLower(tt.name) t.Run("default", func(t *testing.T) { - runTestCase(ctx, t, tt) + runRolloutRollingTestCase(ctx, t, tt) }) } @@ -273,13 +272,9 @@ func Test_rolloutSequencesWithPredictableReconcileOrder(t *testing.T) { tt.name = fmt.Sprintf("%s, random(0)", name) tt.randomControllerOrder = true tt.seed = 0 - // TODO(in-place): drop the following line as soon as issue with scale down are fixed - tt.skipLogToFileAndGoldenFileCheck = true - if tt.logAndGoldenFileName == "" { - tt.logAndGoldenFileName = strings.ToLower(tt.name) - } + tt.logAndGoldenFileName = strings.ToLower(tt.name) t.Run("random(0)", func(t *testing.T) { - runTestCase(ctx, t, tt) + runRolloutRollingTestCase(ctx, t, tt) }) } @@ -291,7 +286,7 @@ func Test_rolloutSequencesWithPredictableReconcileOrder(t *testing.T) { tt.randomControllerOrder = true tt.skipLogToFileAndGoldenFileCheck = true t.Run(fmt.Sprintf("random(%d)", tt.seed), func(t *testing.T) { - runTestCase(ctx, t, tt) + runRolloutRollingTestCase(ctx, t, tt) }) } } @@ -299,7 +294,7 @@ func Test_rolloutSequencesWithPredictableReconcileOrder(t *testing.T) { } } -func runTestCase(ctx context.Context, t *testing.T, tt rolloutSequenceTestCase) { +func runRolloutRollingTestCase(ctx context.Context, t *testing.T, tt rolloutRollingSequenceTestCase) { t.Helper() g := NewWithT(t) @@ -310,7 +305,7 @@ func runTestCase(ctx context.Context, t *testing.T, tt rolloutSequenceTestCase) // Init current and desired state from test case current := tt.currentScope.Clone() if current == nil { - current = initCurrentRolloutScope(tt) + current = initCurrentRolloutScope(tt.currentMachineNames, withRolloutStrategy(tt.maxSurge, tt.maxUnavailable)) } desired := computeDesiredRolloutScope(current, tt.desiredMachineNames) @@ -324,12 +319,15 @@ func runTestCase(ctx context.Context, t *testing.T, tt rolloutSequenceTestCase) i := 1 maxIterations := tt.maxIterations for { - taskOrder := defaultTaskOrder(current) + taskList := getTaskListRolloutRolling(current) + taskCount := len(taskList) + taskOrder := defaultTaskOrder(taskCount) if tt.randomControllerOrder { - taskOrder = randomTaskOrder(current, rng) + taskOrder = randomTaskOrder(taskCount, rng) } for _, taskID := range taskOrder { - if taskID == 0 { + task := taskList[taskID] + if task == "md" { fLogger.Logf("[MD controller] Iteration %d, Reconcile md", i) fLogger.Logf("[MD controller] - Input to rollout planner\n%s", current) @@ -339,8 +337,9 @@ func runTestCase(ctx context.Context, t *testing.T, tt rolloutSequenceTestCase) p.newMS = current.newMS() p.oldMSs = current.oldMSs() - err := p.Plan(ctx) + err := p.planRolloutRolling(ctx) g.Expect(err).ToNot(HaveOccurred()) + // Apply changes. for _, ms := range current.machineSets { if scaleIntent, ok := p.scaleIntents[ms.Name]; ok { @@ -383,8 +382,8 @@ func runTestCase(ctx context.Context, t *testing.T, tt rolloutSequenceTestCase) // Run mutators faking other controllers for _, ms := range current.machineSets { - if fmt.Sprintf("ms%d", taskID) == ms.Name { - fLogger.Logf("[MS controller] Iteration %d, Reconcile ms%d, %s", i, taskID, msLog(ms, current.machineSetMachines[ms.Name])) + if ms.Name == task { + fLogger.Logf("[MS controller] Iteration %d, Reconcile %s, %s", i, ms.Name, msLog(ms, current.machineSetMachines[ms.Name])) machineSetControllerMutator(fLogger, ms, current) break } @@ -486,15 +485,14 @@ type rolloutScope struct { machineUID int32 } -// Init creates current state and desired state for rolling out a md from currentMachines to wantMachineNames. -func initCurrentRolloutScope(tt rolloutSequenceTestCase) (current *rolloutScope) { +func initCurrentRolloutScope(currentMachineNames []string, mdOptions ...machineDeploymentOption) (current *rolloutScope) { // create current state, with a MD with // - given MaxSurge, MaxUnavailable // - replica counters assuming all the machines are at stable state // - spec different from the MachineSets and Machines we are going to create down below (to simulate a change that triggers a rollout, but it is not yet started) - mdReplicaCount := int32(len(tt.currentMachineNames)) + mdReplicaCount := int32(len(currentMachineNames)) current = &rolloutScope{ - machineDeployment: createMD("v2", mdReplicaCount, tt.maxSurge, tt.maxUnavailable), + machineDeployment: createMD("v2", mdReplicaCount, mdOptions...), } // Create current MS, with @@ -507,13 +505,12 @@ func initCurrentRolloutScope(tt rolloutSequenceTestCase) (current *rolloutScope) // - spec at stable state (rollout is not yet propagated to machines) var totMachines int32 currentMachines := []*clusterv1.Machine{} - for _, machineSetMachineName := range tt.currentMachineNames { + for _, machineSetMachineName := range currentMachineNames { totMachines++ currentMachines = append(currentMachines, createM(machineSetMachineName, ms.Name, ms.Spec.Template.Spec.FailureDomain)) } current.machineSetMachines = map[string][]*clusterv1.Machine{} current.machineSetMachines[ms.Name] = currentMachines - current.machineUID = totMachines // TODO(in-place): this should be removed as soon as rolloutPlanner will take care of creating newMS @@ -835,20 +832,30 @@ func maxSurgeToleration() func(log *fileLogger, _ int, _ *rolloutScope, _, _ int } } +func getTaskListRolloutRolling(current *rolloutScope) []string { + taskList := make([]string, 0) + taskList = append(taskList, "md") + for _, ms := range current.machineSets { + taskList = append(taskList, ms.Name) + } + taskList = append(taskList, fmt.Sprintf("ms%d", len(current.machineSets)+1)) // r the MachineSet that might be created when reconciling md + return taskList +} + // default task order ensure the controllers are run in a consistent and predictable way: md, ms1, ms2 and so on. -func defaultTaskOrder(current *rolloutScope) []int { +func defaultTaskOrder(taskCount int) []int { taskOrder := []int{} - for t := range len(current.machineSets) + 1 + 1 { // +1 is for the MachineSet that might be created when reconciling md, +1 is for the md itself + for t := range taskCount { taskOrder = append(taskOrder, t) } return taskOrder } -func randomTaskOrder(current *rolloutScope, rng *rand.Rand) []int { +func randomTaskOrder(taskCount int, rng *rand.Rand) []int { u := &UniqueRand{ rng: rng, generated: map[int]bool{}, - max: len(current.machineSets) + 1 + 1, // +1 is for the MachineSet that might be created when reconciling md, +1 is for the md itself + max: taskCount, } taskOrder := []int{} for !u.Done() { @@ -864,32 +871,47 @@ func randomTaskOrder(current *rolloutScope, rng *rand.Rand) []int { return taskOrder } -func createMD(failureDomain string, replicas int32, maxSurge, maxUnavailable int32) *clusterv1.MachineDeployment { - return &clusterv1.MachineDeployment{ +type machineDeploymentOption func(md *clusterv1.MachineDeployment) + +func withRolloutStrategy(maxSurge, maxUnavailable int32) func(md *clusterv1.MachineDeployment) { + return func(md *clusterv1.MachineDeployment) { + md.Spec.Rollout.Strategy = clusterv1.MachineDeploymentRolloutStrategy{ + Type: clusterv1.RollingUpdateMachineDeploymentStrategyType, + RollingUpdate: clusterv1.MachineDeploymentRolloutStrategyRollingUpdate{ + MaxSurge: ptr.To(intstr.FromInt32(maxSurge)), + MaxUnavailable: ptr.To(intstr.FromInt32(maxUnavailable)), + }, + } + } +} + +func createMD(failureDomain string, replicas int32, options ...machineDeploymentOption) *clusterv1.MachineDeployment { + md := &clusterv1.MachineDeployment{ ObjectMeta: metav1.ObjectMeta{Name: "md"}, Spec: clusterv1.MachineDeploymentSpec{ // Note: using failureDomain as a template field to determine upToDate Template: clusterv1.MachineTemplateSpec{Spec: clusterv1.MachineSpec{FailureDomain: failureDomain}}, Replicas: &replicas, - Rollout: clusterv1.MachineDeploymentRolloutSpec{ - Strategy: clusterv1.MachineDeploymentRolloutStrategy{ - Type: clusterv1.RollingUpdateMachineDeploymentStrategyType, - RollingUpdate: clusterv1.MachineDeploymentRolloutStrategyRollingUpdate{ - MaxSurge: ptr.To(intstr.FromInt32(maxSurge)), - MaxUnavailable: ptr.To(intstr.FromInt32(maxUnavailable)), - }, - }, - }, }, Status: clusterv1.MachineDeploymentStatus{ Replicas: &replicas, AvailableReplicas: &replicas, }, } + for _, opt := range options { + opt(md) + } + return md } -func createMS(name, failureDomain string, replicas int32) *clusterv1.MachineSet { - return &clusterv1.MachineSet{ +func withStatusAvailableReplicas(r int32) fakeMachineSetOption { + return func(ms *clusterv1.MachineSet) { + ms.Status.AvailableReplicas = ptr.To(r) + } +} + +func createMS(name, failureDomain string, replicas int32, opts ...fakeMachineSetOption) *clusterv1.MachineSet { + ms := &clusterv1.MachineSet{ ObjectMeta: metav1.ObjectMeta{ Name: name, }, @@ -903,6 +925,10 @@ func createMS(name, failureDomain string, replicas int32) *clusterv1.MachineSet AvailableReplicas: ptr.To(replicas), }, } + for _, opt := range opts { + opt(ms) + } + return ms } func createM(name, ownedByMS, failureDomain string) *clusterv1.Machine { diff --git a/internal/controllers/machinedeployment/testdata/regular rollout, 12 replicas, maxsurge 3, maxunavailable 1, scale down to 6, random(0).test.log.golden b/internal/controllers/machinedeployment/testdata/regular rollout, 12 replicas, maxsurge 3, maxunavailable 1, scale down to 6, random(0).test.log.golden new file mode 100644 index 000000000000..eaa1068c4f5e --- /dev/null +++ b/internal/controllers/machinedeployment/testdata/regular rollout, 12 replicas, maxsurge 3, maxunavailable 1, scale down to 6, random(0).test.log.golden @@ -0,0 +1,59 @@ +## Regular rollout, 12 Replicas, maxSurge 3, maxUnavailable 1, scale down to 6, random(0) + +[Test] Initial state + md, 6/6 replicas + - ms1, 9/9 replicas (m4,m5,m6,m7,m8,m9,m10,m11,m12) + - ms2, 3/3 replicas (m13,m14,m15) +[Test] Rollout 12 replicas, MaxSurge=3, MaxUnavailable=1, random(0) +[MS controller] Iteration 1, Reconcile ms2, 3/3 replicas (m13,m14,m15) +[MS controller] Iteration 1, Reconcile ms1, 9/9 replicas (m4,m5,m6,m7,m8,m9,m10,m11,m12) +[MD controller] Iteration 1, Reconcile md +[MD controller] - Input to rollout planner + md, 6/6 replicas + - ms1, 9/9 replicas (m4,m5,m6,m7,m8,m9,m10,m11,m12) + - ms2, 3/3 replicas (m13,m14,m15) +[MD controller] - Result of rollout planner + md, 12/6 replicas + - ms1, 9/2 replicas (m4,m5,m6,m7,m8,m9,m10,m11,m12) + - ms2, 3/3 replicas (m13,m14,m15) +[Toleration] tolerate maxSurge breach +[MS controller] Iteration 2, Reconcile ms1, 9/2 replicas (m4,m5,m6,m7,m8,m9,m10,m11,m12) +[MS controller] - ms1 scale down to 2/2 replicas (m4,m5,m6,m7,m8,m9,m10 deleted) +[MS controller] Iteration 2, Reconcile ms1, 2/2 replicas (m11,m12) +[MD controller] Iteration 3, Reconcile md +[MD controller] - Input to rollout planner + md, 12/6 replicas + - ms1, 2/2 replicas (m11,m12) + - ms2, 3/3 replicas (m13,m14,m15) +[MD controller] - Result of rollout planner + md, 5/6 replicas + - ms1, 2/2 replicas (m11,m12) + - ms2, 3/6 replicas (m13,m14,m15) +[MS controller] Iteration 3, Reconcile ms2, 3/6 replicas (m13,m14,m15) +[MS controller] - ms2 scale up to 6/6 replicas (m16,m17,m18 created) +[MD controller] Iteration 4, Reconcile md +[MD controller] - Input to rollout planner + md, 5/6 replicas + - ms1, 2/2 replicas (m11,m12) + - ms2, 6/6 replicas (m13,m14,m15,m16,m17,m18) +[MD controller] - Result of rollout planner + md, 8/6 replicas + - ms1, 2/0 replicas (m11,m12) + - ms2, 6/6 replicas (m13,m14,m15,m16,m17,m18) +[MS controller] Iteration 4, Reconcile ms1, 2/0 replicas (m11,m12) +[MS controller] - ms1 scale down to 0/0 replicas (m11,m12 deleted) +[MS controller] Iteration 4, Reconcile ms2, 6/6 replicas (m13,m14,m15,m16,m17,m18) +[MS controller] Iteration 5, Reconcile ms2, 6/6 replicas (m13,m14,m15,m16,m17,m18) +[MD controller] Iteration 5, Reconcile md +[MD controller] - Input to rollout planner + md, 8/6 replicas + - ms1, 0/0 replicas () + - ms2, 6/6 replicas (m13,m14,m15,m16,m17,m18) +[MD controller] - Result of rollout planner + md, 6/6 replicas + - ms1, 0/0 replicas () + - ms2, 6/6 replicas (m13,m14,m15,m16,m17,m18) +[Test] Final state + md, 6/6 replicas + - ms1, 0/0 replicas () + - ms2, 6/6 replicas (m13,m14,m15,m16,m17,m18) diff --git a/internal/controllers/machinedeployment/testdata/regular rollout, 3 replicas, maxsurge 0, maxunavailable 1, random(0).test.log.golden b/internal/controllers/machinedeployment/testdata/regular rollout, 3 replicas, maxsurge 0, maxunavailable 1, random(0).test.log.golden new file mode 100644 index 000000000000..5a971c842f9d --- /dev/null +++ b/internal/controllers/machinedeployment/testdata/regular rollout, 3 replicas, maxsurge 0, maxunavailable 1, random(0).test.log.golden @@ -0,0 +1,105 @@ +## Regular rollout, 3 Replicas, maxSurge 0, maxUnavailable 1, random(0) + +[Test] Initial state + md, 3/3 replicas + - ms1, 3/3 replicas (m1,m2,m3) + - ms2, 0/0 replicas () +[Test] Rollout 3 replicas, MaxSurge=0, MaxUnavailable=1, random(0) +[MS controller] Iteration 1, Reconcile ms2, 0/0 replicas () +[MS controller] Iteration 1, Reconcile ms1, 3/3 replicas (m1,m2,m3) +[MD controller] Iteration 1, Reconcile md +[MD controller] - Input to rollout planner + md, 3/3 replicas + - ms1, 3/3 replicas (m1,m2,m3) + - ms2, 0/0 replicas () +[MD controller] - Result of rollout planner + md, 3/3 replicas + - ms1, 3/2 replicas (m1,m2,m3) + - ms2, 0/0 replicas () +[MS controller] Iteration 2, Reconcile ms1, 3/2 replicas (m1,m2,m3) +[MS controller] - ms1 scale down to 2/2 replicas (m1 deleted) +[MS controller] Iteration 2, Reconcile ms1, 2/2 replicas (m2,m3) +[MD controller] Iteration 3, Reconcile md +[MD controller] - Input to rollout planner + md, 3/3 replicas + - ms1, 2/2 replicas (m2,m3) + - ms2, 0/0 replicas () +[MD controller] - Result of rollout planner + md, 2/3 replicas + - ms1, 2/2 replicas (m2,m3) + - ms2, 0/1 replicas () +[MS controller] Iteration 3, Reconcile ms2, 0/1 replicas () +[MS controller] - ms2 scale up to 1/1 replicas (m4 created) +[MD controller] Iteration 4, Reconcile md +[MD controller] - Input to rollout planner + md, 2/3 replicas + - ms1, 2/2 replicas (m2,m3) + - ms2, 1/1 replicas (m4) +[MD controller] - Result of rollout planner + md, 3/3 replicas + - ms1, 2/1 replicas (m2,m3) + - ms2, 1/1 replicas (m4) +[MS controller] Iteration 4, Reconcile ms1, 2/1 replicas (m2,m3) +[MS controller] - ms1 scale down to 1/1 replicas (m2 deleted) +[MS controller] Iteration 4, Reconcile ms2, 1/1 replicas (m4) +[MS controller] Iteration 5, Reconcile ms2, 1/1 replicas (m4) +[MD controller] Iteration 5, Reconcile md +[MD controller] - Input to rollout planner + md, 3/3 replicas + - ms1, 1/1 replicas (m3) + - ms2, 1/1 replicas (m4) +[MD controller] - Result of rollout planner + md, 2/3 replicas + - ms1, 1/1 replicas (m3) + - ms2, 1/2 replicas (m4) +[MS controller] Iteration 6, Reconcile ms2, 1/2 replicas (m4) +[MS controller] - ms2 scale up to 2/2 replicas (m5 created) +[MD controller] Iteration 7, Reconcile md +[MD controller] - Input to rollout planner + md, 2/3 replicas + - ms1, 1/1 replicas (m3) + - ms2, 2/2 replicas (m4,m5) +[MD controller] - Result of rollout planner + md, 3/3 replicas + - ms1, 1/0 replicas (m3) + - ms2, 2/2 replicas (m4,m5) +[MS controller] Iteration 7, Reconcile ms2, 2/2 replicas (m4,m5) +[MD controller] Iteration 8, Reconcile md +[MD controller] - Input to rollout planner + md, 3/3 replicas + - ms1, 1/0 replicas (m3) + - ms2, 2/2 replicas (m4,m5) +[MD controller] - Result of rollout planner + md, 3/3 replicas + - ms1, 1/0 replicas (m3) + - ms2, 2/2 replicas (m4,m5) +[MS controller] Iteration 8, Reconcile ms1, 1/0 replicas (m3) +[MS controller] - ms1 scale down to 0/0 replicas (m3 deleted) +[MS controller] Iteration 9, Reconcile ms2, 2/2 replicas (m4,m5) +[MS controller] Iteration 9, Reconcile ms1, 0/0 replicas () +[MD controller] Iteration 9, Reconcile md +[MD controller] - Input to rollout planner + md, 3/3 replicas + - ms1, 0/0 replicas () + - ms2, 2/2 replicas (m4,m5) +[MD controller] - Result of rollout planner + md, 2/3 replicas + - ms1, 0/0 replicas () + - ms2, 2/3 replicas (m4,m5) +[MS controller] Iteration 10, Reconcile ms2, 2/3 replicas (m4,m5) +[MS controller] - ms2 scale up to 3/3 replicas (m6 created) +[MS controller] Iteration 10, Reconcile ms1, 0/0 replicas () +[MS controller] Iteration 11, Reconcile ms2, 3/3 replicas (m4,m5,m6) +[MD controller] Iteration 11, Reconcile md +[MD controller] - Input to rollout planner + md, 2/3 replicas + - ms1, 0/0 replicas () + - ms2, 3/3 replicas (m4,m5,m6) +[MD controller] - Result of rollout planner + md, 3/3 replicas + - ms1, 0/0 replicas () + - ms2, 3/3 replicas (m4,m5,m6) +[Test] Final state + md, 3/3 replicas + - ms1, 0/0 replicas () + - ms2, 3/3 replicas (m4,m5,m6) diff --git a/internal/controllers/machinedeployment/testdata/regular rollout, 3 replicas, maxsurge 1, maxunavailable 0, random(0).test.log.golden b/internal/controllers/machinedeployment/testdata/regular rollout, 3 replicas, maxsurge 1, maxunavailable 0, random(0).test.log.golden new file mode 100644 index 000000000000..5045410bc79a --- /dev/null +++ b/internal/controllers/machinedeployment/testdata/regular rollout, 3 replicas, maxsurge 1, maxunavailable 0, random(0).test.log.golden @@ -0,0 +1,136 @@ +## Regular rollout, 3 Replicas, maxSurge 1, maxUnavailable 0, random(0) + +[Test] Initial state + md, 3/3 replicas + - ms1, 3/3 replicas (m1,m2,m3) + - ms2, 0/0 replicas () +[Test] Rollout 3 replicas, MaxSurge=1, MaxUnavailable=0, random(0) +[MS controller] Iteration 1, Reconcile ms2, 0/0 replicas () +[MS controller] Iteration 1, Reconcile ms1, 3/3 replicas (m1,m2,m3) +[MD controller] Iteration 1, Reconcile md +[MD controller] - Input to rollout planner + md, 3/3 replicas + - ms1, 3/3 replicas (m1,m2,m3) + - ms2, 0/0 replicas () +[MD controller] - Result of rollout planner + md, 3/3 replicas + - ms1, 3/3 replicas (m1,m2,m3) + - ms2, 0/1 replicas () +[MS controller] Iteration 2, Reconcile ms1, 3/3 replicas (m1,m2,m3) +[MS controller] Iteration 2, Reconcile ms1, 3/3 replicas (m1,m2,m3) +[MD controller] Iteration 3, Reconcile md +[MD controller] - Input to rollout planner + md, 3/3 replicas + - ms1, 3/3 replicas (m1,m2,m3) + - ms2, 0/1 replicas () +[MD controller] - Result of rollout planner + md, 3/3 replicas + - ms1, 3/3 replicas (m1,m2,m3) + - ms2, 0/1 replicas () +[MS controller] Iteration 3, Reconcile ms2, 0/1 replicas () +[MS controller] - ms2 scale up to 1/1 replicas (m4 created) +[MD controller] Iteration 4, Reconcile md +[MD controller] - Input to rollout planner + md, 3/3 replicas + - ms1, 3/3 replicas (m1,m2,m3) + - ms2, 1/1 replicas (m4) +[MD controller] - Result of rollout planner + md, 4/3 replicas + - ms1, 3/2 replicas (m1,m2,m3) + - ms2, 1/1 replicas (m4) +[MS controller] Iteration 4, Reconcile ms1, 3/2 replicas (m1,m2,m3) +[MS controller] - ms1 scale down to 2/2 replicas (m1 deleted) +[MS controller] Iteration 4, Reconcile ms2, 1/1 replicas (m4) +[MS controller] Iteration 5, Reconcile ms2, 1/1 replicas (m4) +[MD controller] Iteration 5, Reconcile md +[MD controller] - Input to rollout planner + md, 4/3 replicas + - ms1, 2/2 replicas (m2,m3) + - ms2, 1/1 replicas (m4) +[MD controller] - Result of rollout planner + md, 3/3 replicas + - ms1, 2/2 replicas (m2,m3) + - ms2, 1/2 replicas (m4) +[MS controller] Iteration 6, Reconcile ms2, 1/2 replicas (m4) +[MS controller] - ms2 scale up to 2/2 replicas (m5 created) +[MD controller] Iteration 7, Reconcile md +[MD controller] - Input to rollout planner + md, 3/3 replicas + - ms1, 2/2 replicas (m2,m3) + - ms2, 2/2 replicas (m4,m5) +[MD controller] - Result of rollout planner + md, 4/3 replicas + - ms1, 2/1 replicas (m2,m3) + - ms2, 2/2 replicas (m4,m5) +[MS controller] Iteration 7, Reconcile ms2, 2/2 replicas (m4,m5) +[MD controller] Iteration 8, Reconcile md +[MD controller] - Input to rollout planner + md, 4/3 replicas + - ms1, 2/1 replicas (m2,m3) + - ms2, 2/2 replicas (m4,m5) +[MD controller] - Result of rollout planner + md, 4/3 replicas + - ms1, 2/1 replicas (m2,m3) + - ms2, 2/2 replicas (m4,m5) +[MS controller] Iteration 8, Reconcile ms1, 2/1 replicas (m2,m3) +[MS controller] - ms1 scale down to 1/1 replicas (m2 deleted) +[MS controller] Iteration 9, Reconcile ms2, 2/2 replicas (m4,m5) +[MS controller] Iteration 9, Reconcile ms1, 1/1 replicas (m3) +[MD controller] Iteration 9, Reconcile md +[MD controller] - Input to rollout planner + md, 4/3 replicas + - ms1, 1/1 replicas (m3) + - ms2, 2/2 replicas (m4,m5) +[MD controller] - Result of rollout planner + md, 3/3 replicas + - ms1, 1/1 replicas (m3) + - ms2, 2/3 replicas (m4,m5) +[MS controller] Iteration 10, Reconcile ms2, 2/3 replicas (m4,m5) +[MS controller] - ms2 scale up to 3/3 replicas (m6 created) +[MS controller] Iteration 10, Reconcile ms1, 1/1 replicas (m3) +[MS controller] Iteration 11, Reconcile ms2, 3/3 replicas (m4,m5,m6) +[MD controller] Iteration 11, Reconcile md +[MD controller] - Input to rollout planner + md, 3/3 replicas + - ms1, 1/1 replicas (m3) + - ms2, 3/3 replicas (m4,m5,m6) +[MD controller] - Result of rollout planner + md, 4/3 replicas + - ms1, 1/0 replicas (m3) + - ms2, 3/3 replicas (m4,m5,m6) +[MD controller] Iteration 12, Reconcile md +[MD controller] - Input to rollout planner + md, 4/3 replicas + - ms1, 1/0 replicas (m3) + - ms2, 3/3 replicas (m4,m5,m6) +[MD controller] - Result of rollout planner + md, 4/3 replicas + - ms1, 1/0 replicas (m3) + - ms2, 3/3 replicas (m4,m5,m6) +[MD controller] Iteration 14, Reconcile md +[MD controller] - Input to rollout planner + md, 4/3 replicas + - ms1, 1/0 replicas (m3) + - ms2, 3/3 replicas (m4,m5,m6) +[MD controller] - Result of rollout planner + md, 4/3 replicas + - ms1, 1/0 replicas (m3) + - ms2, 3/3 replicas (m4,m5,m6) +[MS controller] Iteration 15, Reconcile ms2, 3/3 replicas (m4,m5,m6) +[MS controller] Iteration 16, Reconcile ms2, 3/3 replicas (m4,m5,m6) +[MS controller] Iteration 16, Reconcile ms2, 3/3 replicas (m4,m5,m6) +[MS controller] Iteration 16, Reconcile ms1, 1/0 replicas (m3) +[MS controller] - ms1 scale down to 0/0 replicas (m3 deleted) +[MD controller] Iteration 16, Reconcile md +[MD controller] - Input to rollout planner + md, 4/3 replicas + - ms1, 0/0 replicas () + - ms2, 3/3 replicas (m4,m5,m6) +[MD controller] - Result of rollout planner + md, 3/3 replicas + - ms1, 0/0 replicas () + - ms2, 3/3 replicas (m4,m5,m6) +[Test] Final state + md, 3/3 replicas + - ms1, 0/0 replicas () + - ms2, 3/3 replicas (m4,m5,m6) diff --git a/internal/controllers/machinedeployment/testdata/regular rollout, 6 replicas, maxsurge 0, maxunavailable 10, random(0).test.log.golden b/internal/controllers/machinedeployment/testdata/regular rollout, 6 replicas, maxsurge 0, maxunavailable 10, random(0).test.log.golden new file mode 100644 index 000000000000..87e5e2c9323a --- /dev/null +++ b/internal/controllers/machinedeployment/testdata/regular rollout, 6 replicas, maxsurge 0, maxunavailable 10, random(0).test.log.golden @@ -0,0 +1,47 @@ +## Regular rollout, 6 Replicas, maxSurge 0, maxUnavailable 10, random(0) + +[Test] Initial state + md, 6/6 replicas + - ms1, 6/6 replicas (m1,m2,m3,m4,m5,m6) + - ms2, 0/0 replicas () +[Test] Rollout 6 replicas, MaxSurge=0, MaxUnavailable=10, random(0) +[MS controller] Iteration 1, Reconcile ms2, 0/0 replicas () +[MS controller] Iteration 1, Reconcile ms1, 6/6 replicas (m1,m2,m3,m4,m5,m6) +[MD controller] Iteration 1, Reconcile md +[MD controller] - Input to rollout planner + md, 6/6 replicas + - ms1, 6/6 replicas (m1,m2,m3,m4,m5,m6) + - ms2, 0/0 replicas () +[MD controller] - Result of rollout planner + md, 6/6 replicas + - ms1, 6/0 replicas (m1,m2,m3,m4,m5,m6) + - ms2, 0/0 replicas () +[MS controller] Iteration 2, Reconcile ms1, 6/0 replicas (m1,m2,m3,m4,m5,m6) +[MS controller] - ms1 scale down to 0/0 replicas (m1,m2,m3,m4,m5,m6 deleted) +[MS controller] Iteration 2, Reconcile ms1, 0/0 replicas () +[MD controller] Iteration 3, Reconcile md +[MD controller] - Input to rollout planner + md, 6/6 replicas + - ms1, 0/0 replicas () + - ms2, 0/0 replicas () +[MD controller] - Result of rollout planner + md, 0/6 replicas + - ms1, 0/0 replicas () + - ms2, 0/6 replicas () +[MS controller] Iteration 3, Reconcile ms2, 0/6 replicas () +[MS controller] - ms2 scale up to 6/6 replicas (m7,m8,m9,m10,m11,m12 created) +[MD controller] Iteration 4, Reconcile md +[MD controller] - Input to rollout planner + md, 0/6 replicas + - ms1, 0/0 replicas () + - ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12) +[MD controller] - Result of rollout planner + md, 6/6 replicas + - ms1, 0/0 replicas () + - ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12) +[MS controller] Iteration 4, Reconcile ms1, 0/0 replicas () +[MS controller] Iteration 4, Reconcile ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12) +[Test] Final state + md, 6/6 replicas + - ms1, 0/0 replicas () + - ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12) diff --git a/internal/controllers/machinedeployment/testdata/regular rollout, 6 replicas, maxsurge 1, maxunavailable 3, random(0).test.log.golden b/internal/controllers/machinedeployment/testdata/regular rollout, 6 replicas, maxsurge 1, maxunavailable 3, random(0).test.log.golden new file mode 100644 index 000000000000..e0ed895585e0 --- /dev/null +++ b/internal/controllers/machinedeployment/testdata/regular rollout, 6 replicas, maxsurge 1, maxunavailable 3, random(0).test.log.golden @@ -0,0 +1,70 @@ +## Regular rollout, 6 Replicas, maxSurge 1, maxUnavailable 3, random(0) + +[Test] Initial state + md, 6/6 replicas + - ms1, 6/6 replicas (m1,m2,m3,m4,m5,m6) + - ms2, 0/0 replicas () +[Test] Rollout 6 replicas, MaxSurge=1, MaxUnavailable=3, random(0) +[MS controller] Iteration 1, Reconcile ms2, 0/0 replicas () +[MS controller] Iteration 1, Reconcile ms1, 6/6 replicas (m1,m2,m3,m4,m5,m6) +[MD controller] Iteration 1, Reconcile md +[MD controller] - Input to rollout planner + md, 6/6 replicas + - ms1, 6/6 replicas (m1,m2,m3,m4,m5,m6) + - ms2, 0/0 replicas () +[MD controller] - Result of rollout planner + md, 6/6 replicas + - ms1, 6/3 replicas (m1,m2,m3,m4,m5,m6) + - ms2, 0/1 replicas () +[MS controller] Iteration 2, Reconcile ms1, 6/3 replicas (m1,m2,m3,m4,m5,m6) +[MS controller] - ms1 scale down to 3/3 replicas (m1,m2,m3 deleted) +[MS controller] Iteration 2, Reconcile ms1, 3/3 replicas (m4,m5,m6) +[MD controller] Iteration 3, Reconcile md +[MD controller] - Input to rollout planner + md, 6/6 replicas + - ms1, 3/3 replicas (m4,m5,m6) + - ms2, 0/1 replicas () +[MD controller] - Result of rollout planner + md, 3/6 replicas + - ms1, 3/3 replicas (m4,m5,m6) + - ms2, 0/4 replicas () +[MS controller] Iteration 3, Reconcile ms2, 0/4 replicas () +[MS controller] - ms2 scale up to 4/4 replicas (m7,m8,m9,m10 created) +[MD controller] Iteration 4, Reconcile md +[MD controller] - Input to rollout planner + md, 3/6 replicas + - ms1, 3/3 replicas (m4,m5,m6) + - ms2, 4/4 replicas (m7,m8,m9,m10) +[MD controller] - Result of rollout planner + md, 7/6 replicas + - ms1, 3/0 replicas (m4,m5,m6) + - ms2, 4/4 replicas (m7,m8,m9,m10) +[MS controller] Iteration 4, Reconcile ms1, 3/0 replicas (m4,m5,m6) +[MS controller] - ms1 scale down to 0/0 replicas (m4,m5,m6 deleted) +[MS controller] Iteration 4, Reconcile ms2, 4/4 replicas (m7,m8,m9,m10) +[MS controller] Iteration 5, Reconcile ms2, 4/4 replicas (m7,m8,m9,m10) +[MD controller] Iteration 5, Reconcile md +[MD controller] - Input to rollout planner + md, 7/6 replicas + - ms1, 0/0 replicas () + - ms2, 4/4 replicas (m7,m8,m9,m10) +[MD controller] - Result of rollout planner + md, 4/6 replicas + - ms1, 0/0 replicas () + - ms2, 4/6 replicas (m7,m8,m9,m10) +[MS controller] Iteration 6, Reconcile ms2, 4/6 replicas (m7,m8,m9,m10) +[MS controller] - ms2 scale up to 6/6 replicas (m11,m12 created) +[MD controller] Iteration 7, Reconcile md +[MD controller] - Input to rollout planner + md, 4/6 replicas + - ms1, 0/0 replicas () + - ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12) +[MD controller] - Result of rollout planner + md, 6/6 replicas + - ms1, 0/0 replicas () + - ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12) +[MS controller] Iteration 7, Reconcile ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12) +[Test] Final state + md, 6/6 replicas + - ms1, 0/0 replicas () + - ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12) diff --git a/internal/controllers/machinedeployment/testdata/regular rollout, 6 replicas, maxsurge 10, maxunavailable 0, random(0).test.log.golden b/internal/controllers/machinedeployment/testdata/regular rollout, 6 replicas, maxsurge 10, maxunavailable 0, random(0).test.log.golden new file mode 100644 index 000000000000..067f8cb229e8 --- /dev/null +++ b/internal/controllers/machinedeployment/testdata/regular rollout, 6 replicas, maxsurge 10, maxunavailable 0, random(0).test.log.golden @@ -0,0 +1,57 @@ +## Regular rollout, 6 Replicas, maxSurge 10, maxUnavailable 0, random(0) + +[Test] Initial state + md, 6/6 replicas + - ms1, 6/6 replicas (m1,m2,m3,m4,m5,m6) + - ms2, 0/0 replicas () +[Test] Rollout 6 replicas, MaxSurge=10, MaxUnavailable=0, random(0) +[MS controller] Iteration 1, Reconcile ms2, 0/0 replicas () +[MS controller] Iteration 1, Reconcile ms1, 6/6 replicas (m1,m2,m3,m4,m5,m6) +[MD controller] Iteration 1, Reconcile md +[MD controller] - Input to rollout planner + md, 6/6 replicas + - ms1, 6/6 replicas (m1,m2,m3,m4,m5,m6) + - ms2, 0/0 replicas () +[MD controller] - Result of rollout planner + md, 6/6 replicas + - ms1, 6/6 replicas (m1,m2,m3,m4,m5,m6) + - ms2, 0/6 replicas () +[MS controller] Iteration 2, Reconcile ms1, 6/6 replicas (m1,m2,m3,m4,m5,m6) +[MS controller] Iteration 2, Reconcile ms1, 6/6 replicas (m1,m2,m3,m4,m5,m6) +[MD controller] Iteration 3, Reconcile md +[MD controller] - Input to rollout planner + md, 6/6 replicas + - ms1, 6/6 replicas (m1,m2,m3,m4,m5,m6) + - ms2, 0/6 replicas () +[MD controller] - Result of rollout planner + md, 6/6 replicas + - ms1, 6/6 replicas (m1,m2,m3,m4,m5,m6) + - ms2, 0/6 replicas () +[MS controller] Iteration 3, Reconcile ms2, 0/6 replicas () +[MS controller] - ms2 scale up to 6/6 replicas (m7,m8,m9,m10,m11,m12 created) +[MD controller] Iteration 4, Reconcile md +[MD controller] - Input to rollout planner + md, 6/6 replicas + - ms1, 6/6 replicas (m1,m2,m3,m4,m5,m6) + - ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12) +[MD controller] - Result of rollout planner + md, 12/6 replicas + - ms1, 6/0 replicas (m1,m2,m3,m4,m5,m6) + - ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12) +[MS controller] Iteration 4, Reconcile ms1, 6/0 replicas (m1,m2,m3,m4,m5,m6) +[MS controller] - ms1 scale down to 0/0 replicas (m1,m2,m3,m4,m5,m6 deleted) +[MS controller] Iteration 4, Reconcile ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12) +[MS controller] Iteration 5, Reconcile ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12) +[MD controller] Iteration 5, Reconcile md +[MD controller] - Input to rollout planner + md, 12/6 replicas + - ms1, 0/0 replicas () + - ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12) +[MD controller] - Result of rollout planner + md, 6/6 replicas + - ms1, 0/0 replicas () + - ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12) +[Test] Final state + md, 6/6 replicas + - ms1, 0/0 replicas () + - ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12) diff --git a/internal/controllers/machinedeployment/testdata/regular rollout, 6 replicas, maxsurge 3, maxunavailable 1, change spec, random(0).test.log.golden b/internal/controllers/machinedeployment/testdata/regular rollout, 6 replicas, maxsurge 3, maxunavailable 1, change spec, random(0).test.log.golden new file mode 100644 index 000000000000..d2aad77f1860 --- /dev/null +++ b/internal/controllers/machinedeployment/testdata/regular rollout, 6 replicas, maxsurge 3, maxunavailable 1, change spec, random(0).test.log.golden @@ -0,0 +1,123 @@ +## Regular rollout, 6 Replicas, maxSurge 3, maxUnavailable 1, change spec, random(0) + +[Test] Initial state + md, 6/6 replicas + - ms1, 3/3 replicas (m4,m5,m6) + - ms2, 3/3 replicas (m7,m8,m9) + - ms3, 0/0 replicas () +[Test] Rollout 6 replicas, MaxSurge=3, MaxUnavailable=1, random(0) +[MS controller] Iteration 1, Reconcile ms1, 3/3 replicas (m4,m5,m6) +[MS controller] Iteration 1, Reconcile ms2, 3/3 replicas (m7,m8,m9) +[MS controller] Iteration 1, Reconcile ms3, 0/0 replicas () +[MS controller] Iteration 2, Reconcile ms3, 0/0 replicas () +[MS controller] Iteration 3, Reconcile ms1, 3/3 replicas (m4,m5,m6) +[MS controller] Iteration 3, Reconcile ms2, 3/3 replicas (m7,m8,m9) +[MS controller] Iteration 3, Reconcile ms2, 3/3 replicas (m7,m8,m9) +[MS controller] Iteration 3, Reconcile ms3, 0/0 replicas () +[MD controller] Iteration 3, Reconcile md +[MD controller] - Input to rollout planner + md, 6/6 replicas + - ms1, 3/3 replicas (m4,m5,m6) + - ms2, 3/3 replicas (m7,m8,m9) + - ms3, 0/0 replicas () +[MD controller] - Result of rollout planner + md, 6/6 replicas + - ms1, 3/2 replicas (m4,m5,m6) + - ms2, 3/3 replicas (m7,m8,m9) + - ms3, 0/3 replicas () +[MS controller] Iteration 3, Reconcile ms2, 3/3 replicas (m7,m8,m9) +[MS controller] Iteration 3, Reconcile ms3, 0/3 replicas () +[MS controller] - ms3 scale up to 3/3 replicas (m10,m11,m12 created) +[MS controller] Iteration 3, Reconcile ms3, 3/3 replicas (m10,m11,m12) +[MS controller] Iteration 4, Reconcile ms2, 3/3 replicas (m7,m8,m9) +[MS controller] Iteration 4, Reconcile ms3, 3/3 replicas (m10,m11,m12) +[MD controller] Iteration 4, Reconcile md +[MD controller] - Input to rollout planner + md, 6/6 replicas + - ms1, 3/2 replicas (m4,m5,m6) + - ms2, 3/3 replicas (m7,m8,m9) + - ms3, 3/3 replicas (m10,m11,m12) +[MD controller] - Result of rollout planner + md, 9/6 replicas + - ms1, 3/0 replicas (m4,m5,m6) + - ms2, 3/3 replicas (m7,m8,m9) + - ms3, 3/3 replicas (m10,m11,m12) +[MD controller] Iteration 4, Reconcile md +[MD controller] - Input to rollout planner + md, 9/6 replicas + - ms1, 3/0 replicas (m4,m5,m6) + - ms2, 3/3 replicas (m7,m8,m9) + - ms3, 3/3 replicas (m10,m11,m12) +[MD controller] - Result of rollout planner + md, 9/6 replicas + - ms1, 3/0 replicas (m4,m5,m6) + - ms2, 3/3 replicas (m7,m8,m9) + - ms3, 3/3 replicas (m10,m11,m12) +[MS controller] Iteration 5, Reconcile ms3, 3/3 replicas (m10,m11,m12) +[MS controller] Iteration 5, Reconcile ms1, 3/0 replicas (m4,m5,m6) +[MS controller] - ms1 scale down to 0/0 replicas (m4,m5,m6 deleted) +[MS controller] Iteration 5, Reconcile ms3, 3/3 replicas (m10,m11,m12) +[MD controller] Iteration 6, Reconcile md +[MD controller] - Input to rollout planner + md, 9/6 replicas + - ms1, 0/0 replicas () + - ms2, 3/3 replicas (m7,m8,m9) + - ms3, 3/3 replicas (m10,m11,m12) +[MD controller] - Result of rollout planner + md, 6/6 replicas + - ms1, 0/0 replicas () + - ms2, 3/2 replicas (m7,m8,m9) + - ms3, 3/6 replicas (m10,m11,m12) +[MS controller] Iteration 6, Reconcile ms3, 3/6 replicas (m10,m11,m12) +[MS controller] - ms3 scale up to 6/6 replicas (m13,m14,m15 created) +[MS controller] Iteration 6, Reconcile ms2, 3/2 replicas (m7,m8,m9) +[MS controller] - ms2 scale down to 2/2 replicas (m7 deleted) +[MS controller] Iteration 6, Reconcile ms1, 0/0 replicas () +[MS controller] Iteration 7, Reconcile ms2, 2/2 replicas (m8,m9) +[MS controller] Iteration 7, Reconcile ms3, 6/6 replicas (m10,m11,m12,m13,m14,m15) +[MS controller] Iteration 8, Reconcile ms1, 0/0 replicas () +[MD controller] Iteration 8, Reconcile md +[MD controller] - Input to rollout planner + md, 6/6 replicas + - ms1, 0/0 replicas () + - ms2, 2/2 replicas (m8,m9) + - ms3, 6/6 replicas (m10,m11,m12,m13,m14,m15) +[MD controller] - Result of rollout planner + md, 8/6 replicas + - ms1, 0/0 replicas () + - ms2, 2/0 replicas (m8,m9) + - ms3, 6/6 replicas (m10,m11,m12,m13,m14,m15) +[MS controller] Iteration 8, Reconcile ms1, 0/0 replicas () +[MS controller] Iteration 8, Reconcile ms2, 2/0 replicas (m8,m9) +[MS controller] - ms2 scale down to 0/0 replicas (m8,m9 deleted) +[MS controller] Iteration 9, Reconcile ms3, 6/6 replicas (m10,m11,m12,m13,m14,m15) +[MS controller] Iteration 9, Reconcile ms2, 0/0 replicas () +[MD controller] Iteration 9, Reconcile md +[MD controller] - Input to rollout planner + md, 8/6 replicas + - ms1, 0/0 replicas () + - ms2, 0/0 replicas () + - ms3, 6/6 replicas (m10,m11,m12,m13,m14,m15) +[MD controller] - Result of rollout planner + md, 6/6 replicas + - ms1, 0/0 replicas () + - ms2, 0/0 replicas () + - ms3, 6/6 replicas (m10,m11,m12,m13,m14,m15) +[MD controller] Iteration 9, Reconcile md +[MD controller] - Input to rollout planner + md, 6/6 replicas + - ms1, 0/0 replicas () + - ms2, 0/0 replicas () + - ms3, 6/6 replicas (m10,m11,m12,m13,m14,m15) +[MD controller] - Result of rollout planner + md, 6/6 replicas + - ms1, 0/0 replicas () + - ms2, 0/0 replicas () + - ms3, 6/6 replicas (m10,m11,m12,m13,m14,m15) +[MS controller] Iteration 9, Reconcile ms1, 0/0 replicas () +[MS controller] Iteration 9, Reconcile ms1, 0/0 replicas () +[Test] Final state + md, 6/6 replicas + - ms1, 0/0 replicas () + - ms2, 0/0 replicas () + - ms3, 6/6 replicas (m10,m11,m12,m13,m14,m15) diff --git a/internal/controllers/machinedeployment/testdata/regular rollout, 6 replicas, maxsurge 3, maxunavailable 1, random(0).test.log.golden b/internal/controllers/machinedeployment/testdata/regular rollout, 6 replicas, maxsurge 3, maxunavailable 1, random(0).test.log.golden new file mode 100644 index 000000000000..d2560cd9f190 --- /dev/null +++ b/internal/controllers/machinedeployment/testdata/regular rollout, 6 replicas, maxsurge 3, maxunavailable 1, random(0).test.log.golden @@ -0,0 +1,92 @@ +## Regular rollout, 6 Replicas, maxSurge 3, maxUnavailable 1, random(0) + +[Test] Initial state + md, 6/6 replicas + - ms1, 6/6 replicas (m1,m2,m3,m4,m5,m6) + - ms2, 0/0 replicas () +[Test] Rollout 6 replicas, MaxSurge=3, MaxUnavailable=1, random(0) +[MS controller] Iteration 1, Reconcile ms2, 0/0 replicas () +[MS controller] Iteration 1, Reconcile ms1, 6/6 replicas (m1,m2,m3,m4,m5,m6) +[MD controller] Iteration 1, Reconcile md +[MD controller] - Input to rollout planner + md, 6/6 replicas + - ms1, 6/6 replicas (m1,m2,m3,m4,m5,m6) + - ms2, 0/0 replicas () +[MD controller] - Result of rollout planner + md, 6/6 replicas + - ms1, 6/5 replicas (m1,m2,m3,m4,m5,m6) + - ms2, 0/3 replicas () +[MS controller] Iteration 2, Reconcile ms1, 6/5 replicas (m1,m2,m3,m4,m5,m6) +[MS controller] - ms1 scale down to 5/5 replicas (m1 deleted) +[MS controller] Iteration 2, Reconcile ms1, 5/5 replicas (m2,m3,m4,m5,m6) +[MD controller] Iteration 3, Reconcile md +[MD controller] - Input to rollout planner + md, 6/6 replicas + - ms1, 5/5 replicas (m2,m3,m4,m5,m6) + - ms2, 0/3 replicas () +[MD controller] - Result of rollout planner + md, 5/6 replicas + - ms1, 5/5 replicas (m2,m3,m4,m5,m6) + - ms2, 0/4 replicas () +[MS controller] Iteration 3, Reconcile ms2, 0/4 replicas () +[MS controller] - ms2 scale up to 4/4 replicas (m7,m8,m9,m10 created) +[MD controller] Iteration 4, Reconcile md +[MD controller] - Input to rollout planner + md, 5/6 replicas + - ms1, 5/5 replicas (m2,m3,m4,m5,m6) + - ms2, 4/4 replicas (m7,m8,m9,m10) +[MD controller] - Result of rollout planner + md, 9/6 replicas + - ms1, 5/1 replicas (m2,m3,m4,m5,m6) + - ms2, 4/4 replicas (m7,m8,m9,m10) +[MS controller] Iteration 4, Reconcile ms1, 5/1 replicas (m2,m3,m4,m5,m6) +[MS controller] - ms1 scale down to 1/1 replicas (m2,m3,m4,m5 deleted) +[MS controller] Iteration 4, Reconcile ms2, 4/4 replicas (m7,m8,m9,m10) +[MS controller] Iteration 5, Reconcile ms2, 4/4 replicas (m7,m8,m9,m10) +[MD controller] Iteration 5, Reconcile md +[MD controller] - Input to rollout planner + md, 9/6 replicas + - ms1, 1/1 replicas (m6) + - ms2, 4/4 replicas (m7,m8,m9,m10) +[MD controller] - Result of rollout planner + md, 5/6 replicas + - ms1, 1/1 replicas (m6) + - ms2, 4/6 replicas (m7,m8,m9,m10) +[MS controller] Iteration 6, Reconcile ms2, 4/6 replicas (m7,m8,m9,m10) +[MS controller] - ms2 scale up to 6/6 replicas (m11,m12 created) +[MD controller] Iteration 7, Reconcile md +[MD controller] - Input to rollout planner + md, 5/6 replicas + - ms1, 1/1 replicas (m6) + - ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12) +[MD controller] - Result of rollout planner + md, 7/6 replicas + - ms1, 1/0 replicas (m6) + - ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12) +[MS controller] Iteration 7, Reconcile ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12) +[MD controller] Iteration 8, Reconcile md +[MD controller] - Input to rollout planner + md, 7/6 replicas + - ms1, 1/0 replicas (m6) + - ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12) +[MD controller] - Result of rollout planner + md, 7/6 replicas + - ms1, 1/0 replicas (m6) + - ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12) +[MS controller] Iteration 8, Reconcile ms1, 1/0 replicas (m6) +[MS controller] - ms1 scale down to 0/0 replicas (m6 deleted) +[MS controller] Iteration 9, Reconcile ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12) +[MS controller] Iteration 9, Reconcile ms1, 0/0 replicas () +[MD controller] Iteration 9, Reconcile md +[MD controller] - Input to rollout planner + md, 7/6 replicas + - ms1, 0/0 replicas () + - ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12) +[MD controller] - Result of rollout planner + md, 6/6 replicas + - ms1, 0/0 replicas () + - ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12) +[Test] Final state + md, 6/6 replicas + - ms1, 0/0 replicas () + - ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12) diff --git a/internal/controllers/machinedeployment/testdata/regular rollout, 6 replicas, maxsurge 3, maxunavailable 1, scale up to 12, random(0).test.log.golden b/internal/controllers/machinedeployment/testdata/regular rollout, 6 replicas, maxsurge 3, maxunavailable 1, scale up to 12, random(0).test.log.golden new file mode 100644 index 000000000000..f2d37c25a933 --- /dev/null +++ b/internal/controllers/machinedeployment/testdata/regular rollout, 6 replicas, maxsurge 3, maxunavailable 1, scale up to 12, random(0).test.log.golden @@ -0,0 +1,59 @@ +## Regular rollout, 6 Replicas, maxSurge 3, maxUnavailable 1, scale up to 12, random(0) + +[Test] Initial state + md, 12/12 replicas + - ms1, 3/3 replicas (m4,m5,m6) + - ms2, 3/3 replicas (m7,m8,m9) +[Test] Rollout 6 replicas, MaxSurge=3, MaxUnavailable=1, random(0) +[MS controller] Iteration 1, Reconcile ms2, 3/3 replicas (m7,m8,m9) +[MS controller] Iteration 1, Reconcile ms1, 3/3 replicas (m4,m5,m6) +[MD controller] Iteration 1, Reconcile md +[MD controller] - Input to rollout planner + md, 12/12 replicas + - ms1, 3/3 replicas (m4,m5,m6) + - ms2, 3/3 replicas (m7,m8,m9) +[MD controller] - Result of rollout planner + md, 6/12 replicas + - ms1, 3/3 replicas (m4,m5,m6) + - ms2, 3/12 replicas (m7,m8,m9) +[Toleration] tolerate maxUnavailable breach +[MS controller] Iteration 2, Reconcile ms1, 3/3 replicas (m4,m5,m6) +[MS controller] Iteration 2, Reconcile ms1, 3/3 replicas (m4,m5,m6) +[MD controller] Iteration 3, Reconcile md +[MD controller] - Input to rollout planner + md, 6/12 replicas + - ms1, 3/3 replicas (m4,m5,m6) + - ms2, 3/12 replicas (m7,m8,m9) +[MD controller] - Result of rollout planner + md, 6/12 replicas + - ms1, 3/3 replicas (m4,m5,m6) + - ms2, 3/12 replicas (m7,m8,m9) +[Toleration] tolerate maxUnavailable breach +[MS controller] Iteration 3, Reconcile ms2, 3/12 replicas (m7,m8,m9) +[MS controller] - ms2 scale up to 12/12 replicas (m10,m11,m12,m13,m14,m15,m16,m17,m18 created) +[MD controller] Iteration 4, Reconcile md +[MD controller] - Input to rollout planner + md, 6/12 replicas + - ms1, 3/3 replicas (m4,m5,m6) + - ms2, 12/12 replicas (m7,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18) +[MD controller] - Result of rollout planner + md, 15/12 replicas + - ms1, 3/0 replicas (m4,m5,m6) + - ms2, 12/12 replicas (m7,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18) +[MS controller] Iteration 4, Reconcile ms1, 3/0 replicas (m4,m5,m6) +[MS controller] - ms1 scale down to 0/0 replicas (m4,m5,m6 deleted) +[MS controller] Iteration 4, Reconcile ms2, 12/12 replicas (m7,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18) +[MS controller] Iteration 5, Reconcile ms2, 12/12 replicas (m7,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18) +[MD controller] Iteration 5, Reconcile md +[MD controller] - Input to rollout planner + md, 15/12 replicas + - ms1, 0/0 replicas () + - ms2, 12/12 replicas (m7,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18) +[MD controller] - Result of rollout planner + md, 12/12 replicas + - ms1, 0/0 replicas () + - ms2, 12/12 replicas (m7,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18) +[Test] Final state + md, 12/12 replicas + - ms1, 0/0 replicas () + - ms2, 12/12 replicas (m7,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18)