From d16533a05da3fce1f6fb7236dd30a06b2c7883fb Mon Sep 17 00:00:00 2001
From: fabriziopandini <fpandini@vmware.com>
Date: Wed, 1 Oct 2025 12:59:40 +0200
Subject: [PATCH 1/5] Fix ScaleDownOldMS

---
 .../machinedeployment_rolling.go              | 286 +++++-----
 .../machinedeployment_rolling_test.go         | 516 +++++++++++++-----
 ...machinedeployment_rollout_sequence_test.go |  35 +-
 ...scale down to 6, random(0).test.log.golden |  59 ++
 ...axunavailable 1, random(0).test.log.golden | 105 ++++
 ...axunavailable 0, random(0).test.log.golden | 136 +++++
 ...xunavailable 10, random(0).test.log.golden |  47 ++
 ...axunavailable 3, random(0).test.log.golden |  70 +++
 ...axunavailable 0, random(0).test.log.golden |  57 ++
 ... 1, change spec, random(0).test.log.golden | 123 +++++
 ...axunavailable 1, random(0).test.log.golden |  92 ++++
 ... scale up to 12, random(0).test.log.golden |  59 ++
 12 files changed, 1292 insertions(+), 293 deletions(-)
 create mode 100644 internal/controllers/machinedeployment/testdata/regular rollout, 12 replicas, maxsurge 3, maxunavailable 1, scale down to 6, random(0).test.log.golden
 create mode 100644 internal/controllers/machinedeployment/testdata/regular rollout, 3 replicas, maxsurge 0, maxunavailable 1, random(0).test.log.golden
 create mode 100644 internal/controllers/machinedeployment/testdata/regular rollout, 3 replicas, maxsurge 1, maxunavailable 0, random(0).test.log.golden
 create mode 100644 internal/controllers/machinedeployment/testdata/regular rollout, 6 replicas, maxsurge 0, maxunavailable 10, random(0).test.log.golden
 create mode 100644 internal/controllers/machinedeployment/testdata/regular rollout, 6 replicas, maxsurge 1, maxunavailable 3, random(0).test.log.golden
 create mode 100644 internal/controllers/machinedeployment/testdata/regular rollout, 6 replicas, maxsurge 10, maxunavailable 0, random(0).test.log.golden
 create mode 100644 internal/controllers/machinedeployment/testdata/regular rollout, 6 replicas, maxsurge 3, maxunavailable 1, change spec, random(0).test.log.golden
 create mode 100644 internal/controllers/machinedeployment/testdata/regular rollout, 6 replicas, maxsurge 3, maxunavailable 1, random(0).test.log.golden
 create mode 100644 internal/controllers/machinedeployment/testdata/regular rollout, 6 replicas, maxsurge 3, maxunavailable 1, scale up to 12, random(0).test.log.golden

diff --git a/internal/controllers/machinedeployment/machinedeployment_rolling.go b/internal/controllers/machinedeployment/machinedeployment_rolling.go
index 83bd72f70d6e..85f10cbc3b00 100644
--- a/internal/controllers/machinedeployment/machinedeployment_rolling.go
+++ b/internal/controllers/machinedeployment/machinedeployment_rolling.go
@@ -122,7 +122,16 @@ func (p *rolloutPlanner) Plan(ctx context.Context) error {
 	}
 
 	// Scale down, if we can.
-	return p.reconcileOldMachineSets(ctx)
+	if err := p.reconcileOldMachineSets(ctx); err != nil {
+		return err
+	}
+
+	// This funcs tries to detect and address the case when a rollout is not making progress because both scaling down and scaling up are blocked.
+	// Note. this func must be called after computing scale up/down intent for all the MachineSets.
+	// Note. this func only address deadlock due to unavailable machines not getting deleted on oldMSs, e.g. due to a wrong configuration.
+	// unblocking deadlock when unavailable machines exists only on oldMSs, is required also because failures on old machines set are not remediated by MHC.
+	p.reconcileDeadlockBreaker(ctx)
+	return nil
 }
 
 func (p *rolloutPlanner) reconcileNewMachineSet(ctx context.Context) error {
@@ -160,204 +169,177 @@ func (p *rolloutPlanner) reconcileNewMachineSet(ctx context.Context) error {
 }
 
 func (p *rolloutPlanner) reconcileOldMachineSets(ctx context.Context) error {
-	log := ctrl.LoggerFrom(ctx)
+	allMSs := append(p.oldMSs, p.newMS)
 
-	oldMachinesCount := mdutil.GetReplicaCountForMachineSets(p.oldMSs)
-	if oldMachinesCount == 0 {
-		// Can't scale down further
+	// no op if there are no replicas on old machinesets
+	if mdutil.GetReplicaCountForMachineSets(p.oldMSs) == 0 {
 		return nil
 	}
 
-	newMSReplicas := ptr.Deref(p.newMS.Spec.Replicas, 0)
-	if v, ok := p.scaleIntents[p.newMS.Name]; ok {
-		newMSReplicas = v
-	}
-	allMachinesCount := oldMachinesCount + newMSReplicas
-	log.V(4).Info("New MachineSet has available machines",
-		"machineset", client.ObjectKeyFromObject(p.newMS).String(), "available-replicas", ptr.Deref(p.newMS.Status.AvailableReplicas, 0))
 	maxUnavailable := mdutil.MaxUnavailable(*p.md)
+	minAvailable := max(ptr.Deref(p.md.Spec.Replicas, 0)-maxUnavailable, 0)
 
-	// Check if we can scale down. We can scale down in the following 2 cases:
-	// * Some old MachineSets have unhealthy replicas, we could safely scale down those unhealthy replicas since that won't further
-	//  increase unavailability.
-	// * New MachineSet has scaled up and it's replicas becomes ready, then we can scale down old MachineSets in a further step.
-	//
-	// maxScaledDown := allMachinesCount - minAvailable - newMachineSetMachinesUnavailable
-	// take into account not only maxUnavailable and any surge machines that have been created, but also unavailable machines from
-	// the newMS, so that the unavailable machines from the newMS would not make us scale down old MachineSets in a further
-	// step(that will increase unavailability).
-	//
-	// Concrete example:
-	//
-	// * 10 replicas
-	// * 2 maxUnavailable (absolute number, not percent)
-	// * 3 maxSurge (absolute number, not percent)
-	//
-	// case 1:
-	// * Deployment is updated, newMS is created with 3 replicas, oldMS is scaled down to 8, and newMS is scaled up to 5.
-	// * The new MachineSet machines crashloop and never become available.
-	// * allMachinesCount is 13. minAvailable is 8. newMSMachinesUnavailable is 5.
-	// * A node fails and causes one of the oldMS machines to become unavailable. However, 13 - 8 - 5 = 0, so the oldMS won't be scaled down.
-	// * The user notices the crashloop and does kubectl rollout undo to rollback.
-	// * newMSMachinesUnavailable is 1, since we rolled back to the good MachineSet, so maxScaledDown = 13 - 8 - 1 = 4. 4 of the crashlooping machines will be scaled down.
-	// * The total number of machines will then be 9 and the newMS can be scaled up to 10.
-	//
-	// case 2:
-	// Same example, but pushing a new machine template instead of rolling back (aka "roll over"):
-	// * The new MachineSet created must start with 0 replicas because allMachinesCount is already at 13.
-	// * However, newMSMachinesUnavailable would also be 0, so the 2 old MachineSets could be scaled down by 5 (13 - 8 - 0), which would then
-	// allow the new MachineSet to be scaled up by 5.
-	availableReplicas := ptr.Deref(p.newMS.Status.AvailableReplicas, 0)
-
-	minAvailable := *(p.md.Spec.Replicas) - maxUnavailable
-	newMSUnavailableMachineCount := newMSReplicas - availableReplicas
-	maxScaledDown := allMachinesCount - minAvailable - newMSUnavailableMachineCount
-	if maxScaledDown <= 0 {
-		return nil
+	totReplicas := mdutil.GetReplicaCountForMachineSets(allMSs)
+
+	// Find the number of available machines.
+	totAvailableReplicas := ptr.Deref(mdutil.GetAvailableReplicaCountForMachineSets(allMSs), 0)
+
+	// Find the number of pending scale down from previous reconcile/from current reconcile;
+	// This is required because whenever we are reducing the number of replicas, this operation could further impact availability e.g.
+	// - in case of regular rollout, there is no certainty about which machine is going to be deleted (and if this machine is currently available or not):
+	// 	 - e.g. MS controller is going to delete first machines with deletion annotation; also MS controller has a slight different notion of unavailable as of now.
+	// - in case of in-place rollout, in-place upgrade are always assumed as impacting availability (they can always fail).
+	totPendingScaleDown := int32(0)
+	for _, ms := range allMSs {
+		scaleIntent := ptr.Deref(ms.Spec.Replicas, 0)
+		if v, ok := p.scaleIntents[ms.Name]; ok {
+			scaleIntent = min(scaleIntent, v)
+		}
+
+		// NOTE: we are counting only pending scale down from the current status.replicas (so scale down of actual machines).
+		if scaleIntent < ptr.Deref(ms.Status.Replicas, 0) {
+			totPendingScaleDown += max(ptr.Deref(ms.Status.Replicas, 0)-scaleIntent, 0)
+		}
 	}
 
-	// Clean up unhealthy replicas first, otherwise unhealthy replicas will block deployment
-	// and cause timeout. See https://github.com/kubernetes/kubernetes/issues/16737
-	cleanupCount, err := p.cleanupUnhealthyReplicas(ctx, maxScaledDown)
-	if err != nil {
-		return err
+	// Compute the total number of replicas that can be scaled down.
+	// Exit immediately if there is no room for scaling down.
+	totalScaleDownCount := max(totReplicas-totPendingScaleDown-minAvailable, 0)
+	if totalScaleDownCount <= 0 {
+		return nil
 	}
 
-	log.V(4).Info("Cleaned up unhealthy replicas from old MachineSets", "count", cleanupCount)
+	sort.Sort(mdutil.MachineSetsByCreationTimestamp(p.oldMSs))
 
-	// Scale down old MachineSets, need check maxUnavailable to ensure we can scale down
-	scaledDownCount, err := p.scaleDownOldMachineSetsForRollingUpdate(ctx)
-	if err != nil {
-		return err
-	}
+	// Scale down only unavailable replicas / up to residual totalScaleDownCount.
+	// NOTE: we are scaling up unavailable machines first in order to increase chances for the rollout to progress;
+	// however, the MS controller might have different opinion on which machines to scale down.
+	// As a consequence, the scale down operation must continuously assess if reducing the number of replicas
+	// for an older MS could further impact availability under the assumption than any scale down could further impact availability (same as above).
+	// if reducing the number of replicase might lead to breaching minAvailable, scale down extent must be limited accordingly.
+	totalScaleDownCount, totAvailableReplicas = p.scaleDownOldMSs(ctx, totalScaleDownCount, totAvailableReplicas, minAvailable, false)
+
+	// Then scale down old MS up to zero replicas / up to residual totalScaleDownCount.
+	// NOTE: also in this case, continuously assess if reducing the number of replicase could further impact availability,
+	// and if necessary, limit scale down extent to ensure the operation respects minAvailable limits.
+	_, _ = p.scaleDownOldMSs(ctx, totalScaleDownCount, totAvailableReplicas, minAvailable, true)
 
-	log.V(4).Info("Scaled down old MachineSets of MachineDeployment", "count", scaledDownCount)
 	return nil
 }
 
-// cleanupUnhealthyReplicas will scale down old MachineSets with unhealthy replicas, so that all unhealthy replicas will be deleted.
-func (p *rolloutPlanner) cleanupUnhealthyReplicas(ctx context.Context, maxCleanupCount int32) (int32, error) {
+func (p *rolloutPlanner) scaleDownOldMSs(ctx context.Context, totalScaleDownCount, totAvailableReplicas, minAvailable int32, scaleToZero bool) (int32, int32) {
 	log := ctrl.LoggerFrom(ctx)
 
-	sort.Sort(mdutil.MachineSetsByCreationTimestamp(p.oldMSs))
-
-	// Scale down all old MachineSets with any unhealthy replicas. MachineSet will honour spec.deletion.order
-	// for deleting Machines. Machines with a deletion timestamp, with a failure message or without a nodeRef
-	// are preferred for all strategies.
-	// This results in a best effort to remove machines backing unhealthy nodes.
-	totalScaledDown := int32(0)
-
 	for _, oldMS := range p.oldMSs {
-		if oldMS.Spec.Replicas == nil {
-			return 0, errors.Errorf("spec.replicas for MachineSet %v is nil, this is unexpected", client.ObjectKeyFromObject(oldMS))
-		}
-
-		if totalScaledDown >= maxCleanupCount {
+		// No op if there is no scaling down left.
+		if totalScaleDownCount <= 0 {
 			break
 		}
 
-		oldMSReplicas := *(oldMS.Spec.Replicas)
-		if oldMSReplicas == 0 {
-			// cannot scale down this MachineSet.
-			continue
+		// No op if this MS has been already scaled down to zero.
+		scaleIntent := ptr.Deref(oldMS.Spec.Replicas, 0)
+		if v, ok := p.scaleIntents[oldMS.Name]; ok {
+			scaleIntent = min(scaleIntent, v)
 		}
 
-		oldMSAvailableReplicas := ptr.Deref(oldMS.Status.AvailableReplicas, 0)
-		log.V(4).Info("Found available Machines in old MachineSet",
-			"count", oldMSAvailableReplicas, "target-machineset", client.ObjectKeyFromObject(oldMS).String())
-		if oldMSReplicas == oldMSAvailableReplicas {
-			// no unhealthy replicas found, no scaling required.
+		if scaleIntent <= 0 {
 			continue
 		}
 
-		// TODO(in-place): fix this logic
-		//  It looks like that the current logic fails when the MD controller is called twice in a row, without MS controller being triggered in the between, e.g.
-		//  - first reconcile scales down ms1, 6-->5 (-1)
-		//  - second reconcile is not taking into account scales down already in progress, unhealthy count is wrongly computed as -1 instead of 0, this leads to increasing replica count instead of keeping it as it is (or scaling down), and then the safeguard below errors out.
-
-		remainingCleanupCount := maxCleanupCount - totalScaledDown
-		unhealthyCount := oldMSReplicas - oldMSAvailableReplicas
-		scaledDownCount := min(remainingCleanupCount, unhealthyCount)
-		newReplicasCount := oldMSReplicas - scaledDownCount
+		// Compute the scale down extent by considering either unavailable replicas or, if scaleToZero is set, all replicas.
+		// In both cases, scale down is limited to totalScaleDownCount.
+		maxScaleDown := max(scaleIntent-ptr.Deref(oldMS.Status.AvailableReplicas, 0), 0)
+		if scaleToZero {
+			maxScaleDown = scaleIntent
+		}
+		scaleDown := min(maxScaleDown, totalScaleDownCount)
 
-		if newReplicasCount > oldMSReplicas {
-			return 0, errors.Errorf("when cleaning up unhealthy replicas, got invalid request to scale down %v: %d -> %d",
-				client.ObjectKeyFromObject(oldMS), oldMSReplicas, newReplicasCount)
+		// Exit if there is no room for scaling down the MS.
+		if scaleDown == 0 {
+			continue
 		}
 
-		scaleDownCount := *(oldMS.Spec.Replicas) - newReplicasCount
-		log.V(5).Info(fmt.Sprintf("Setting scale down intent for MachineSet %s to %d replicas (-%d)", oldMS.Name, newReplicasCount, scaleDownCount), "MachineSet", klog.KObj(oldMS))
-		p.scaleIntents[oldMS.Name] = newReplicasCount
+		// Before scaling down validate if the operation will lead to a breach to minAvailability
+		// In order to do so, consider how many machines will be actually deleted, and consider this operation as impacting availability;
+		// if the projected state breaches minAvailability, reduce the scale down extend accordingly.
+		availableMachineScaleDown := int32(0)
+		if ptr.Deref(oldMS.Status.AvailableReplicas, 0) > 0 {
+			newScaleIntent := scaleIntent - scaleDown
+			machineScaleDownIntent := max(ptr.Deref(oldMS.Status.Replicas, 0)-newScaleIntent, 0)
+			if totAvailableReplicas-machineScaleDownIntent < minAvailable {
+				availableMachineScaleDown = max(totAvailableReplicas-minAvailable, 0)
+				scaleDown = scaleDown - machineScaleDownIntent + availableMachineScaleDown
+
+				totAvailableReplicas = max(totAvailableReplicas-availableMachineScaleDown, 0)
+			}
+		}
 
-		totalScaledDown += scaledDownCount
+		if scaleDown > 0 {
+			newScaleIntent := max(scaleIntent-scaleDown, 0)
+			log.V(5).Info(fmt.Sprintf("Setting scale down intent for %s to %d replicas (-%d)", oldMS.Name, newScaleIntent, scaleDown), "machineset", client.ObjectKeyFromObject(oldMS).String())
+			p.scaleIntents[oldMS.Name] = newScaleIntent
+			totalScaleDownCount = max(totalScaleDownCount-scaleDown, 0)
+		}
 	}
 
-	return totalScaledDown, nil
+	return totalScaleDownCount, totAvailableReplicas
 }
 
-// scaleDownOldMachineSetsForRollingUpdate scales down old MachineSets when deployment strategy is "RollingUpdate".
-// Need check maxUnavailable to ensure availability.
-func (p *rolloutPlanner) scaleDownOldMachineSetsForRollingUpdate(ctx context.Context) (int32, error) {
+// This funcs tries to detect and address the case when a rollout is not making progress because both scaling down and scaling up are blocked.
+// Note. this func must be called after computing scale up/down intent for all the MachineSets.
+// Note. this func only address deadlock due to unavailable machines not getting deleted on oldMSs, e.g. due to a wrong configuration.
+// unblocking deadlock when unavailable machines exists only on oldMSs, is required also because failures on old machines set are not remediated by MHC.
+func (p *rolloutPlanner) reconcileDeadlockBreaker(ctx context.Context) {
 	log := ctrl.LoggerFrom(ctx)
 	allMSs := append(p.oldMSs, p.newMS)
 
-	maxUnavailable := mdutil.MaxUnavailable(*p.md)
-	minAvailable := *(p.md.Spec.Replicas) - maxUnavailable
-
-	// Find the number of available machines.
-	availableMachineCount := ptr.Deref(mdutil.GetAvailableReplicaCountForMachineSets(allMSs), 0)
-
-	// Check if we can scale down.
-	if availableMachineCount <= minAvailable {
-		// Cannot scale down.
-		return 0, nil
+	// if there are no replicas on the old MS, rollout is completed, no deadlock (actually no rollout in progress).
+	if ptr.Deref(mdutil.GetActualReplicaCountForMachineSets(p.oldMSs), 0) == 0 {
+		return
 	}
 
-	log.V(4).Info("Found available machines in deployment, scaling down old MSes", "count", availableMachineCount)
-
-	sort.Sort(mdutil.MachineSetsByCreationTimestamp(p.oldMSs))
-
-	// TODO(in-place): fix this logic
-	//  It looks like that the current logic fails when the MD controller is called twice in a row e.g. reconcile of md 6 replicas MaxSurge=3, MaxUnavailable=1 and
-	//     ms1, 6/5 replicas << one is scaling down, but scale down not yet processed by the MS controller.
-	//     ms2, 3/3 replicas
-	//  Leads to:
-	//     ms1, 6/1 replicas << it further scaled down by 4, which leads to totAvailable machines is less than MinUnavailable, which should not happen
-	//     ms2, 3/3 replicas
+	// if all the replicas on OldMS are available, no deadlock (regular scale up newMS and scale down oldMS should take over from here).
+	if ptr.Deref(mdutil.GetActualReplicaCountForMachineSets(p.oldMSs), 0) == ptr.Deref(mdutil.GetAvailableReplicaCountForMachineSets(p.oldMSs), 0) {
+		return
+	}
 
-	totalScaledDown := int32(0)
-	totalScaleDownCount := availableMachineCount - minAvailable
-	for _, oldMS := range p.oldMSs {
-		if totalScaledDown >= totalScaleDownCount {
-			// No further scaling required.
-			break
+	// if there are scale operation in progress, no deadlock.
+	// Note: we are considering both scale operation from previous and current reconcile.
+	// Note: we are counting only pending scale up & down from the current status.replicas (so actual scale up & down of replicas number, not any other possible "re-alignment" of spec.replicas).
+	for _, ms := range allMSs {
+		scaleIntent := ptr.Deref(ms.Spec.Replicas, 0)
+		if v, ok := p.scaleIntents[ms.Name]; ok {
+			scaleIntent = v
 		}
-
-		oldMSReplicas := ptr.Deref(oldMS.Spec.Replicas, 0)
-		if v, ok := p.scaleIntents[oldMS.Name]; ok {
-			oldMSReplicas = v
+		if scaleIntent != ptr.Deref(ms.Status.Replicas, 0) {
+			return
 		}
+	}
 
-		if oldMSReplicas == 0 {
-			// cannot scale down this MachineSet.
-			continue
-		}
+	// if there are unavailable replicas on the newMS, wait for them to become available first.
+	// Note: a rollout cannot be unblocked if new machines do not become available.
+	// Note: if the replicas on the newMS are not going to become available for any issue either:
+	// - automatic remediation can help in addressing temporary failures.
+	// - user intervention is required to fix more permanent issues e.g. to fix a wrong configuration.
+	if ptr.Deref(p.newMS.Status.AvailableReplicas, 0) != ptr.Deref(p.newMS.Status.Replicas, 0) {
+		return
+	}
 
-		// Scale down.
-		scaleDownCount := min(oldMSReplicas, totalScaleDownCount-totalScaledDown)
-		newReplicasCount := oldMSReplicas - scaleDownCount
-		if newReplicasCount > oldMSReplicas {
-			return totalScaledDown, errors.Errorf("when scaling down old MachineSet, got invalid request to scale down %v: %d -> %d",
-				client.ObjectKeyFromObject(oldMS), oldMSReplicas, newReplicasCount)
+	// At this point we can assume there is a deadlock that can be remediated by breaching maxUnavailability constraint
+	// and scaling down an oldMS with unavailable machines by one.
+	//
+	// Note: in most cases this is only a formal violation of maxUnavailability, because there is a good changes
+	// that the machine that will be deleted is one of the unavailable machines
+	for _, oldMS := range p.oldMSs {
+		if ptr.Deref(oldMS.Status.AvailableReplicas, 0) == ptr.Deref(oldMS.Status.Replicas, 0) || ptr.Deref(oldMS.Spec.Replicas, 0) == 0 {
+			continue
 		}
 
-		log.V(5).Info(fmt.Sprintf("Setting scale down intent for MachineSet %s to %d replicas (-%d)", oldMS.Name, newReplicasCount, scaleDownCount), "MachineSet", klog.KObj(oldMS))
-		p.scaleIntents[oldMS.Name] = newReplicasCount
-
-		totalScaledDown += scaleDownCount
+		newScaleIntent := max(ptr.Deref(oldMS.Spec.Replicas, 0)-1, 0)
+		log.Info(fmt.Sprintf("Setting scale down intent for %s to %d replicas (-%d) to unblock rollout stuck due to unavailable machine on oldMS only", oldMS.Name, newScaleIntent, 1), "machineset", client.ObjectKeyFromObject(oldMS).String())
+		p.scaleIntents[oldMS.Name] = newScaleIntent
+		return
 	}
-
-	return totalScaledDown, nil
 }
 
 // cleanupDisableMachineCreateAnnotation will remove the disable machine create annotation from new MachineSets that were created during reconcileOldMachineSetsOnDelete.
diff --git a/internal/controllers/machinedeployment/machinedeployment_rolling_test.go b/internal/controllers/machinedeployment/machinedeployment_rolling_test.go
index 31e555e857fd..7579d94d695f 100644
--- a/internal/controllers/machinedeployment/machinedeployment_rolling_test.go
+++ b/internal/controllers/machinedeployment/machinedeployment_rolling_test.go
@@ -17,6 +17,7 @@ limitations under the License.
 package machinedeployment
 
 import (
+	"context"
 	"testing"
 
 	. "github.com/onsi/gomega"
@@ -24,6 +25,7 @@ import (
 	"k8s.io/utils/ptr"
 
 	clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2"
+	"sigs.k8s.io/cluster-api/internal/controllers/machinedeployment/mdutil"
 )
 
 func TestReconcileNewMachineSet(t *testing.T) {
@@ -267,150 +269,412 @@ func TestReconcileNewMachineSet(t *testing.T) {
 	}
 }
 
-func TestReconcileOldMachineSets(t *testing.T) {
-	testCases := []struct {
-		name                           string
-		machineDeployment              *clusterv1.MachineDeployment
-		newMachineSet                  *clusterv1.MachineSet
-		oldMachineSets                 []*clusterv1.MachineSet
-		expectedOldMachineSetsReplicas int
-		error                          error
+func Test_reconcileOldMachineSets(t *testing.T) {
+	var ctx = context.Background()
+
+	tests := []struct {
+		name                       string
+		md                         *clusterv1.MachineDeployment
+		scaleIntent                map[string]int32
+		newMS                      *clusterv1.MachineSet
+		oldMSs                     []*clusterv1.MachineSet
+		expectScaleIntent          map[string]int32
+		skipMaxUnavailabilityCheck bool
 	}{
 		{
-			name: "RollingUpdate strategy: Scale down old MachineSets when all new replicas are available",
-			machineDeployment: &clusterv1.MachineDeployment{
-				ObjectMeta: metav1.ObjectMeta{
-					Namespace: "foo",
-					Name:      "bar",
-				},
-				Spec: clusterv1.MachineDeploymentSpec{
-					Rollout: clusterv1.MachineDeploymentRolloutSpec{
-						Strategy: clusterv1.MachineDeploymentRolloutStrategy{
-							Type: clusterv1.RollingUpdateMachineDeploymentStrategyType,
-							RollingUpdate: clusterv1.MachineDeploymentRolloutStrategyRollingUpdate{
-								MaxUnavailable: intOrStrPtr(1),
-								MaxSurge:       intOrStrPtr(3),
-							},
-						},
-					},
-					Replicas: ptr.To[int32](2),
-				},
+			name:        "no op if there are no replicas on old machinesets",
+			scaleIntent: map[string]int32{},
+			md:          createMD("v2", 10, 1, 0),
+			newMS:       createMS("ms2", "v2", 10, withStatusReplicas(10), withStatusAvailableReplicas(10)),
+			oldMSs: []*clusterv1.MachineSet{
+				createMS("ms1", "v1", 0, withStatusReplicas(0), withStatusAvailableReplicas(0)),
 			},
-			newMachineSet: &clusterv1.MachineSet{
-				ObjectMeta: metav1.ObjectMeta{
-					Namespace: "foo",
-					Name:      "bar",
-				},
-				Spec: clusterv1.MachineSetSpec{
-					Replicas: ptr.To[int32](0),
-				},
-				Status: clusterv1.MachineSetStatus{
-					AvailableReplicas: ptr.To[int32](2),
-				},
+			expectScaleIntent: map[string]int32{
+				// no new scale down intent for oldMSs (ms1):
 			},
-			oldMachineSets: []*clusterv1.MachineSet{
-				{
-					ObjectMeta: metav1.ObjectMeta{
-						Namespace: "foo",
-						Name:      "2replicas",
-					},
-					Spec: clusterv1.MachineSetSpec{
-						Replicas: ptr.To[int32](2),
-					},
-					Status: clusterv1.MachineSetStatus{
-						AvailableReplicas: ptr.To[int32](2),
-					},
-				},
-				{
-					ObjectMeta: metav1.ObjectMeta{
-						Namespace: "foo",
-						Name:      "1replicas",
-					},
-					Spec: clusterv1.MachineSetSpec{
-						Replicas: ptr.To[int32](1),
-					},
-					Status: clusterv1.MachineSetStatus{
-						AvailableReplicas: ptr.To[int32](1),
-					},
-				},
+		},
+		{
+			name:        "do not scale down if replicas is equal to minAvailable replicas",
+			scaleIntent: map[string]int32{},
+			md:          createMD("v2", 3, 1, 0),
+			newMS:       createMS("ms2", "v2", 1, withStatusReplicas(1), withStatusAvailableReplicas(1)),
+			oldMSs: []*clusterv1.MachineSet{
+				createMS("ms1", "v1", 2, withStatusReplicas(2), withStatusAvailableReplicas(2)),
+			},
+			expectScaleIntent: map[string]int32{
+				// no new scale down intent for oldMSs (ms1):
+				// 2 available replicas from ms1 + 1 available replica from ms22 = 3 available replicas <= minAvailability, we cannot scale down
 			},
-			expectedOldMachineSetsReplicas: 0,
 		},
 		{
-			name: "RollingUpdate strategy: It does not scale down old MachineSets when above maxUnavailable",
-			machineDeployment: &clusterv1.MachineDeployment{
-				ObjectMeta: metav1.ObjectMeta{
-					Namespace: "foo",
-					Name:      "bar",
-				},
-				Spec: clusterv1.MachineDeploymentSpec{
-					Rollout: clusterv1.MachineDeploymentRolloutSpec{
-						Strategy: clusterv1.MachineDeploymentRolloutStrategy{
-							Type: clusterv1.RollingUpdateMachineDeploymentStrategyType,
-							RollingUpdate: clusterv1.MachineDeploymentRolloutStrategyRollingUpdate{
-								MaxUnavailable: intOrStrPtr(2),
-								MaxSurge:       intOrStrPtr(3),
-							},
-						},
-					},
-					Replicas: ptr.To[int32](10),
-				},
+			name:        "do not scale down if replicas is less then minAvailable replicas",
+			scaleIntent: map[string]int32{},
+			md:          createMD("v2", 3, 1, 0),
+			newMS:       createMS("ms2", "v2", 1, withStatusReplicas(1), withStatusAvailableReplicas(1)),
+			oldMSs: []*clusterv1.MachineSet{
+				createMS("ms1", "v1", 2, withStatusReplicas(2), withStatusAvailableReplicas(1)),
 			},
-			newMachineSet: &clusterv1.MachineSet{
-				ObjectMeta: metav1.ObjectMeta{
-					Namespace: "foo",
-					Name:      "bar",
-				},
-				Spec: clusterv1.MachineSetSpec{
-					Replicas: ptr.To[int32](5),
-				},
-				Status: clusterv1.MachineSetStatus{
-					Replicas: ptr.To[int32](5),
-				},
+			expectScaleIntent: map[string]int32{
+				// no new scale down intent for oldMSs (ms1):
+				// 1 available replicas from ms1 + 1 available replica from ms22 = 3 available replicas = minAvailability, we cannot scale down
 			},
-			oldMachineSets: []*clusterv1.MachineSet{
-				{
-					ObjectMeta: metav1.ObjectMeta{
-						Namespace: "foo",
-						Name:      "8replicas",
-					},
-					Spec: clusterv1.MachineSetSpec{
-						Replicas: ptr.To[int32](8),
-					},
-					Status: clusterv1.MachineSetStatus{
-						Replicas:          ptr.To[int32](10),
-						ReadyReplicas:     ptr.To[int32](8),
-						AvailableReplicas: ptr.To[int32](8),
-					},
-				},
+			skipMaxUnavailabilityCheck: true,
+		},
+		{
+			name:        "do not scale down if there are more replicas than minAvailable replicas, but scale down from a previous reconcile already takes the availability buffer",
+			scaleIntent: map[string]int32{},
+			md:          createMD("v2", 3, 1, 0),
+			newMS:       createMS("ms2", "v2", 1, withStatusReplicas(1), withStatusAvailableReplicas(1)),
+			oldMSs: []*clusterv1.MachineSet{
+				createMS("ms1", "v1", 2, withStatusReplicas(3), withStatusAvailableReplicas(3)), // OldMS is scaling down from a previous reconcile
+			},
+			expectScaleIntent: map[string]int32{
+				// no new scale down intent for oldMSs (ms1):
+				// 3 available replicas from ms1 - 1 replica already scaling down from ms1 + 1 available replica from ms2 = 3 available replicas = minAvailability, we cannot further scale down
+			},
+		},
+		{
+			name:        "do not scale down if there are more replicas than minAvailable replicas, but scale down from a previous reconcile already takes the availability buffer, scale down from a previous reconcile on another MSr",
+			scaleIntent: map[string]int32{},
+			md:          createMD("v2", 6, 3, 1),
+			newMS:       createMS("ms3", "v2", 3, withStatusReplicas(0), withStatusAvailableReplicas(0)), // NewMS is scaling up from previous reconcile, but replicas do not exists yet
+			oldMSs: []*clusterv1.MachineSet{
+				createMS("ms1", "v1", 2, withStatusReplicas(3), withStatusAvailableReplicas(3)), // OldMS is scaling down from a previous reconcile
+				createMS("ms2", "v1", 3, withStatusReplicas(3), withStatusAvailableReplicas(3)), // OldMS
+			},
+			expectScaleIntent: map[string]int32{
+				// no new scale down intent for oldMSs (ms1):
+				// 3 available replicas from ms1 - 1 replica already scaling down from ms1 + 3 available replica from ms2 = 5 available replicas = minAvailability, we cannot further scale down
+			},
+		},
+		{
+			name: "do not scale down if there are more replicas than minAvailable replicas, but scale down from current reconcile already takes the availability buffer",
+			scaleIntent: map[string]int32{
+				"ms2": 1, // newMS (ms2) has a scaling down intent from current reconcile
+			},
+			md:    createMD("v2", 3, 1, 0),
+			newMS: createMS("ms2", "v2", 2, withStatusReplicas(2), withStatusAvailableReplicas(2)),
+			oldMSs: []*clusterv1.MachineSet{
+				createMS("ms1", "v1", 2, withStatusReplicas(2), withStatusAvailableReplicas(2)),
+			},
+			expectScaleIntent: map[string]int32{
+				"ms2": 1,
+				// no new scale down intent for oldMSs (ms1):
+				// 2 available replicas from ms1 + 2 available replicas from ms2 - 1 replica already scaling down from ms2 = 3 available replicas = minAvailability, we cannot further scale down
+			},
+		},
+		{
+			name:        "do not scale down replicas when there are more replicas than minAvailable replicas, but not all the replicas are available (unavailability on newMS)",
+			scaleIntent: map[string]int32{},
+			md:          createMD("v2", 3, 1, 0),
+			newMS:       createMS("ms2", "v2", 1, withStatusReplicas(1), withStatusAvailableReplicas(0)), // no replicas are available
+			oldMSs: []*clusterv1.MachineSet{
+				createMS("ms1", "v1", 3, withStatusReplicas(3), withStatusAvailableReplicas(3)),
+			},
+			expectScaleIntent: map[string]int32{
+				// no new scale down intent for oldMSs (ms1):
+				// 3 available replicas from ms1 + 0 available replicas from ms2 = 3 available replicas = minAvailability, we cannot further scale down
+			},
+		},
+		{
+			name:        "do not scale down replicas when there are more replicas than minAvailable replicas, but not all the replicas are available (unavailability on oldMS)",
+			scaleIntent: map[string]int32{},
+			md:          createMD("v2", 3, 1, 0),
+			newMS:       createMS("ms2", "v2", 1, withStatusReplicas(1), withStatusAvailableReplicas(1)),
+			oldMSs: []*clusterv1.MachineSet{
+				createMS("ms1", "v1", 3, withStatusReplicas(3), withStatusAvailableReplicas(2)), // only 2 replicas are available
+			},
+			expectScaleIntent: map[string]int32{
+				// no new scale down intent for oldMSs (ms1):
+				// 2 available replicas from ms1 + 1 available replicas from ms2 = 3 available replicas = minAvailability, we cannot further scale down
+			},
+		},
+		{
+			name:        "scale down replicas when there are more replicas than minAvailable replicas, all replicas are available",
+			scaleIntent: map[string]int32{},
+			md:          createMD("v2", 3, 1, 0),
+			newMS:       createMS("ms2", "v2", 1, withStatusReplicas(1), withStatusAvailableReplicas(1)),
+			oldMSs: []*clusterv1.MachineSet{
+				createMS("ms1", "v1", 3, withStatusReplicas(3), withStatusAvailableReplicas(3)),
+			},
+			expectScaleIntent: map[string]int32{
+				// new scale down intent for oldMSs (ms1):
+				"ms1": 2, // 3 available replicas from ms1 + 1 available replicas from ms2 = 4 available replicas > minAvailability, scale down to 2 replicas (-1)
+			},
+		},
+		{
+			name:        "scale down replicas when there are more replicas than minAvailable replicas, unavailable replicas are scaled down first",
+			scaleIntent: map[string]int32{},
+			md:          createMD("v2", 3, 1, 0),
+			newMS:       createMS("ms4", "v3", 1, withStatusReplicas(1), withStatusAvailableReplicas(1)),
+			oldMSs: []*clusterv1.MachineSet{
+				createMS("ms1", "v0", 2, withStatusReplicas(2), withStatusAvailableReplicas(2)),
+				createMS("ms2", "v1", 1, withStatusReplicas(1), withStatusAvailableReplicas(0)), // no replicas are available
+				createMS("ms3", "v2", 2, withStatusReplicas(2), withStatusAvailableReplicas(0)), // no replicas are available
+			},
+			expectScaleIntent: map[string]int32{
+				// new scale down intent for oldMSs:
+				// ms1 skipped in the first iteration because it does not have any unavailable replica
+				"ms2": 0, // 0 available replicas from ms2, it can be scaled down with impact on availability (-1)
+				"ms3": 0, // 0 available replicas from ms3, it can be scaled down with impact on availability (-2)
+				// no need to further scale down.
+			},
+		},
+		{
+			name:        "scale down replicas when there are more replicas than minAvailable replicas, unavailable replicas are scaled down first, available replicas are scaled down when unavailable replicas are gone",
+			scaleIntent: map[string]int32{},
+			md:          createMD("v2", 3, 1, 0),
+			newMS:       createMS("ms4", "v3", 1, withStatusReplicas(1), withStatusAvailableReplicas(1)),
+			oldMSs: []*clusterv1.MachineSet{
+				createMS("ms1", "v0", 3, withStatusReplicas(3), withStatusAvailableReplicas(3)),
+				createMS("ms2", "v1", 1, withStatusReplicas(1), withStatusAvailableReplicas(0)), // no replicas are available
+				createMS("ms3", "v2", 2, withStatusReplicas(2), withStatusAvailableReplicas(0)), // no replicas are available
+			},
+			expectScaleIntent: map[string]int32{
+				// new scale down intent for oldMSs:
+				// ms1 skipped in the first iteration because it does not have any unavailable replica
+				"ms2": 0, // 0 available replicas from ms2, it can be scaled down to 0 without any impact on availability (-1)
+				"ms3": 0, // 0 available replicas from ms3, it can be scaled down to 0 without any impact on availability (-1)
+				"ms1": 2, // 3 available replicas from ms2 + 1 available replica from ms4 = 4 available replicas > minAvailability, scale down to 2 replicas (-1)
+			},
+		},
+		{
+			name:        "scale down replicas when there are more replicas than minAvailable replicas, unavailable replicas are scaled down first, available replicas are scaled down when unavailable replicas are gone is not affected by replicas without machines",
+			scaleIntent: map[string]int32{},
+			md:          createMD("v2", 3, 1, 0),
+			newMS:       createMS("ms4", "v3", 1, withStatusReplicas(1), withStatusAvailableReplicas(1)),
+			oldMSs: []*clusterv1.MachineSet{
+				createMS("ms1", "v0", 4, withStatusReplicas(3), withStatusAvailableReplicas(3)), // 1 replica without machine
+				createMS("ms2", "v1", 2, withStatusReplicas(1), withStatusAvailableReplicas(0)), // no replicas are available, 1 replica without machine
+				createMS("ms3", "v2", 5, withStatusReplicas(2), withStatusAvailableReplicas(0)), // no replicas are available, 2 replicas without machines
+			},
+			expectScaleIntent: map[string]int32{
+				// new scale down intent for oldMSs:
+				// ms1 skipped in the first iteration because it does not have any unavailable replica
+				"ms2": 0, // 0 available replicas from ms2, it can be scaled down to 0 without any impact on availability (-1)
+				"ms3": 0, // 0 available replicas from ms3, it can be scaled down to 0 without any impact on availability (-5)
+				"ms1": 2, // 3 available replicas from ms2 + 1 available replica from ms4 = 4 available replicas > minAvailability, scale down to 2 replicas, also drop the replica without machines (-2)
+			},
+		},
+		{
+			name:        "scale down replicas when there are more replicas than minAvailable replicas, unavailable replicas are scaled down first, scale down stops before breaching minAvailable replicas",
+			scaleIntent: map[string]int32{},
+			md:          createMD("v2", 3, 1, 0),
+			newMS:       createMS("ms4", "v3", 1, withStatusReplicas(1), withStatusAvailableReplicas(1)),
+			oldMSs: []*clusterv1.MachineSet{
+				createMS("ms1", "v0", 1, withStatusReplicas(1), withStatusAvailableReplicas(1)),
+				createMS("ms2", "v1", 1, withStatusReplicas(1), withStatusAvailableReplicas(0)), // no replicas are available
+				createMS("ms3", "v2", 2, withStatusReplicas(2), withStatusAvailableReplicas(1)), // only 1 replica is available
+			},
+			expectScaleIntent: map[string]int32{
+				// new scale down intent for oldMSs:
+				"ms2": 0, // 0 available replicas from ms2, it can be scaled down to 0 without any impact on availability (-1)
+				// even if there is still room to scale down, we cannot scale down ms3: 1 available replica from ms1 + 1 available replica from ms4 = 2 available replicas < minAvailability
+				// does not make sense to continue scale down.
+			},
+		},
+		{
+			name:        "scale down replicas when there are more replicas than minAvailable replicas, unavailable replicas are scaled down first, scale down stops before breaching minAvailable replicas is not affected by replicas without machines",
+			scaleIntent: map[string]int32{},
+			md:          createMD("v2", 3, 1, 0),
+			newMS:       createMS("ms4", "v3", 1, withStatusReplicas(1), withStatusAvailableReplicas(1)),
+			oldMSs: []*clusterv1.MachineSet{
+				createMS("ms1", "v0", 2, withStatusReplicas(1), withStatusAvailableReplicas(1)), // 1 replica without machine
+				createMS("ms2", "v1", 3, withStatusReplicas(1), withStatusAvailableReplicas(0)), // no replicas are available, 2 replica without machines
+				createMS("ms3", "v2", 3, withStatusReplicas(2), withStatusAvailableReplicas(1)), // only 1 replica is available, 1 replica without machine
+			},
+			expectScaleIntent: map[string]int32{
+				// new scale down intent for oldMSs:
+				"ms1": 1, // 1 replica without machine, it can be scaled down to 1 without any impact on availability (-1)
+				"ms2": 0, // 1 replica without machine, 0 available replicas from ms2, it can be scaled down to 0 without any impact on availability (-2)
+				"ms3": 2, // 1 replica without machine, it can be scaled down to 2 without any impact on availability (-1); even if there is still room to scale down, we cannot further scale down ms3: 1 available replica from ms1 + 1 available replica from ms4 = 2 available replicas < minAvailability
+				// does not make sense to continue scale down.
+			},
+		},
+		{
+			name:        "scale down replicas when there are more replicas than minAvailable replicas, scale down keeps into account scale downs from a previous reconcile",
+			scaleIntent: map[string]int32{},
+			md:          createMD("v2", 3, 1, 0),
+			newMS:       createMS("ms2", "v3", 2, withStatusReplicas(2), withStatusAvailableReplicas(2)),
+			oldMSs: []*clusterv1.MachineSet{
+				createMS("ms1", "v0", 3, withStatusReplicas(4), withStatusAvailableReplicas(4)), // OldMS is scaling down from a previous reconcile
+			},
+			expectScaleIntent: map[string]int32{
+				// new scale down intent for oldMSs:
+				"ms1": 2, // 4 available replicas from ms1 - 1 replica already scaling down from ms1 + 2 available replicas from ms2 = 4 available replicas > minAvailability, scale down to 2 (-1)
+			},
+		},
+		{
+			name: "scale down replicas when there are more replicas than minAvailable replicas, scale down keeps into account scale downs from the current reconcile",
+			scaleIntent: map[string]int32{
+				"ms2": 1, // newMS (ms2) has a scaling down intent from current reconcile
+			},
+			md:    createMD("v2", 3, 1, 0),
+			newMS: createMS("ms2", "v3", 2, withStatusReplicas(2), withStatusAvailableReplicas(2)),
+			oldMSs: []*clusterv1.MachineSet{
+				createMS("ms1", "v0", 3, withStatusReplicas(3), withStatusAvailableReplicas(3)),
+			},
+			expectScaleIntent: map[string]int32{
+				"ms2": 1,
+				// new scale down intent for oldMSs:
+				"ms1": 2, // 3 available replicas from ms1 + 2 available replicas from ms2 - 1 replica already scaling down from ms2 = 4 available replicas > minAvailability, scale down to 2 (-1)
 			},
-			expectedOldMachineSetsReplicas: 8,
 		},
 	}
-	for _, tc := range testCases {
-		t.Run(tc.name, func(t *testing.T) {
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
 			g := NewWithT(t)
 
-			planner := newRolloutPlanner()
-			planner.md = tc.machineDeployment
-			planner.newMS = tc.newMachineSet
-			planner.oldMSs = tc.oldMachineSets
-
-			err := planner.reconcileOldMachineSets(ctx)
-			if tc.error != nil {
-				g.Expect(err).To(HaveOccurred())
-				g.Expect(err.Error()).To(BeEquivalentTo(tc.error.Error()))
-				return
+			p := &rolloutPlanner{
+				md:           tt.md,
+				newMS:        tt.newMS,
+				oldMSs:       tt.oldMSs,
+				scaleIntents: tt.scaleIntent,
 			}
-
+			err := p.reconcileOldMachineSets(ctx)
 			g.Expect(err).ToNot(HaveOccurred())
-			for i := range tc.oldMachineSets {
-				scaleIntent := ptr.Deref(tc.oldMachineSets[i].Spec.Replicas, 0)
-				if v, ok := planner.scaleIntents[tc.oldMachineSets[i].Name]; ok {
-					scaleIntent = v
+			g.Expect(p.scaleIntents).To(Equal(tt.expectScaleIntent), "unexpected scaleIntents")
+
+			// Check we are not breaching minAvailability by simulating what will happen by applying intent + worst scenario when a machine deletion is always an available machine deletion.
+			for _, oldMS := range tt.oldMSs {
+				scaleIntent, ok := p.scaleIntents[oldMS.Name]
+				if !ok {
+					continue
+				}
+				machineScaleDown := max(ptr.Deref(oldMS.Status.Replicas, 0)-scaleIntent, 0)
+				if machineScaleDown > 0 {
+					oldMS.Status.AvailableReplicas = ptr.To(max(ptr.Deref(oldMS.Status.AvailableReplicas, 0)-machineScaleDown, 0))
 				}
-				g.Expect(scaleIntent).To(BeEquivalentTo(tc.expectedOldMachineSetsReplicas))
 			}
+			minAvailableReplicas := ptr.Deref(tt.md.Spec.Replicas, 0) - mdutil.MaxUnavailable(*tt.md)
+			totAvailableReplicas := ptr.Deref(mdutil.GetAvailableReplicaCountForMachineSets(append(tt.oldMSs, tt.newMS)), 0)
+			if !tt.skipMaxUnavailabilityCheck {
+				g.Expect(totAvailableReplicas).To(BeNumerically(">=", minAvailableReplicas), "totAvailable machines is less than MinUnavailable")
+			} else {
+				t.Logf("skipping MaxUnavailability check (totAvailableReplicas: %d, minAvailableReplicas: %d)", totAvailableReplicas, minAvailableReplicas)
+			}
+		})
+	}
+}
+
+func Test_reconcileDeadlockBreaker(t *testing.T) {
+	var ctx = context.Background()
+
+	tests := []struct {
+		name                       string
+		scaleIntent                map[string]int32
+		newMS                      *clusterv1.MachineSet
+		oldMSs                     []*clusterv1.MachineSet
+		expectScaleIntent          map[string]int32
+		skipMaxUnavailabilityCheck bool
+	}{
+		{
+			name:        "no op if there are no replicas on old machinesets",
+			scaleIntent: map[string]int32{},
+			newMS:       createMS("ms2", "v2", 10, withStatusReplicas(10), withStatusAvailableReplicas(10)),
+			oldMSs: []*clusterv1.MachineSet{
+				createMS("ms1", "v1", 0, withStatusReplicas(0), withStatusAvailableReplicas(0)),
+			},
+			expectScaleIntent: map[string]int32{
+				// no new scale down intent for oldMSs (ms1):
+			},
+		},
+		{
+			name:        "no op if all the replicas on OldMS are available",
+			scaleIntent: map[string]int32{},
+			newMS:       createMS("ms2", "v2", 5, withStatusReplicas(5), withStatusAvailableReplicas(5)),
+			oldMSs: []*clusterv1.MachineSet{
+				createMS("ms1", "v1", 5, withStatusReplicas(5), withStatusAvailableReplicas(5)),
+			},
+			expectScaleIntent: map[string]int32{
+				// no new scale down intent for oldMSs (ms1):
+			},
+		},
+		{
+			name:        "no op if there are scale operation still in progress from a previous reconcile on newMS",
+			scaleIntent: map[string]int32{},
+			newMS:       createMS("ms2", "v2", 6, withStatusReplicas(5), withStatusAvailableReplicas(5)), // scale up from a previous reconcile
+			oldMSs: []*clusterv1.MachineSet{
+				createMS("ms1", "v1", 5, withStatusReplicas(5), withStatusAvailableReplicas(4)), // one unavailable replica, potential deadlock
+			},
+			expectScaleIntent: map[string]int32{
+				// no new scale down intent for oldMSs (ms1):
+			},
+		},
+		{
+			name:        "no op if there are scale operation still in progress from a previous reconcile on oldMS",
+			scaleIntent: map[string]int32{},
+			newMS:       createMS("ms2", "v2", 5, withStatusReplicas(5), withStatusAvailableReplicas(5)),
+			oldMSs: []*clusterv1.MachineSet{
+				createMS("ms1", "v1", 4, withStatusReplicas(5), withStatusAvailableReplicas(4)), // one unavailable replica, potential deadlock, scale down from a previous reconcile
+			},
+			expectScaleIntent: map[string]int32{
+				// no new scale down intent for oldMSs (ms1):
+			},
+		},
+		{
+			name: "no op if there are scale operation from the current reconcile on newMS",
+			scaleIntent: map[string]int32{
+				"ms2": 6, // scale up intent for newMS
+			},
+			newMS: createMS("ms2", "v2", 5, withStatusReplicas(5), withStatusAvailableReplicas(5)),
+			oldMSs: []*clusterv1.MachineSet{
+				createMS("ms1", "v1", 5, withStatusReplicas(5), withStatusAvailableReplicas(4)), // one unavailable replica, potential deadlock
+			},
+			expectScaleIntent: map[string]int32{
+				"ms2": 6,
+				// no new scale down intent for oldMSs (ms1):
+			},
+		},
+		{
+			name: "no op if there are scale operation from the current reconcile on oldMS",
+			scaleIntent: map[string]int32{
+				"ms1": 4, // scale up intent for oldMS
+			},
+			newMS: createMS("ms2", "v2", 5, withStatusReplicas(5), withStatusAvailableReplicas(5)),
+			oldMSs: []*clusterv1.MachineSet{
+				createMS("ms1", "v1", 5, withStatusReplicas(5), withStatusAvailableReplicas(4)), // one unavailable replica, potential deadlock
+			},
+			expectScaleIntent: map[string]int32{
+				// no new scale down intent for oldMSs (ms1):
+				"ms1": 4,
+			},
+		},
+		{
+			name:        "wait for unavailable replicas on the newMS if any",
+			scaleIntent: map[string]int32{},
+			newMS:       createMS("ms2", "v2", 5, withStatusReplicas(5), withStatusAvailableReplicas(3)), // one unavailable replica
+			oldMSs: []*clusterv1.MachineSet{
+				createMS("ms1", "v1", 5, withStatusReplicas(5), withStatusAvailableReplicas(4)), // one unavailable replica, potential deadlock
+			},
+			expectScaleIntent: map[string]int32{
+				// no new scale down intent for oldMSs (ms1):
+			},
+		},
+		{
+			name:        "unblock a deadlock when necessary",
+			scaleIntent: map[string]int32{},
+			newMS:       createMS("ms2", "v2", 5, withStatusReplicas(5), withStatusAvailableReplicas(5)),
+			oldMSs: []*clusterv1.MachineSet{
+				createMS("ms1", "v1", 5, withStatusReplicas(5), withStatusAvailableReplicas(3)), // one unavailable replica, potential deadlock
+			},
+			expectScaleIntent: map[string]int32{
+				// new scale down intent for oldMSs (ms1):
+				"ms1": 4,
+			},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			g := NewWithT(t)
+
+			p := &rolloutPlanner{
+				newMS:        tt.newMS,
+				oldMSs:       tt.oldMSs,
+				scaleIntents: tt.scaleIntent,
+			}
+			p.reconcileDeadlockBreaker(ctx)
+			g.Expect(p.scaleIntents).To(Equal(tt.expectScaleIntent), "unexpected scaleIntents")
 		})
 	}
 }
diff --git a/internal/controllers/machinedeployment/machinedeployment_rollout_sequence_test.go b/internal/controllers/machinedeployment/machinedeployment_rollout_sequence_test.go
index 6c890235d647..d4aeb9d754dd 100644
--- a/internal/controllers/machinedeployment/machinedeployment_rollout_sequence_test.go
+++ b/internal/controllers/machinedeployment/machinedeployment_rollout_sequence_test.go
@@ -28,6 +28,7 @@ import (
 	"time"
 
 	"github.com/google/go-cmp/cmp"
+	"github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/util/intstr"
@@ -107,9 +108,10 @@ type rolloutSequenceTestCase struct {
 	seed int64
 }
 
-func Test_rolloutSequencesWithPredictableReconcileOrder(t *testing.T) {
+func Test_rolloutRollingSequences(t *testing.T) {
 	ctx := context.Background()
 	ctx = ctrl.LoggerInto(ctx, klog.Background())
+	klog.SetOutput(ginkgo.GinkgoWriter)
 
 	tests := []rolloutSequenceTestCase{
 		// Regular rollout (no in-place)
@@ -249,9 +251,8 @@ func Test_rolloutSequencesWithPredictableReconcileOrder(t *testing.T) {
 	}
 
 	testWithPredictableReconcileOrder := true
-	// TODO(in-place): enable tests with random reconcile order as soon as the issues in reconcileOldMachineSets are fixed
-	testWithRandomReconcileOrderFromConstantSeed := false
-	testWithRandomReconcileOrderFromRandomSeed := false
+	testWithRandomReconcileOrderFromConstantSeed := true
+	testWithRandomReconcileOrderFromRandomSeed := true
 
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
@@ -260,9 +261,7 @@ func Test_rolloutSequencesWithPredictableReconcileOrder(t *testing.T) {
 			if testWithPredictableReconcileOrder {
 				tt.maxIterations = 50
 				tt.randomControllerOrder = false
-				if tt.logAndGoldenFileName == "" {
-					tt.logAndGoldenFileName = strings.ToLower(tt.name)
-				}
+				tt.logAndGoldenFileName = strings.ToLower(tt.name)
 				t.Run("default", func(t *testing.T) {
 					runTestCase(ctx, t, tt)
 				})
@@ -273,11 +272,7 @@ func Test_rolloutSequencesWithPredictableReconcileOrder(t *testing.T) {
 				tt.name = fmt.Sprintf("%s, random(0)", name)
 				tt.randomControllerOrder = true
 				tt.seed = 0
-				// TODO(in-place): drop the following line as soon as issue with scale down are fixed
-				tt.skipLogToFileAndGoldenFileCheck = true
-				if tt.logAndGoldenFileName == "" {
-					tt.logAndGoldenFileName = strings.ToLower(tt.name)
-				}
+				tt.logAndGoldenFileName = strings.ToLower(tt.name)
 				t.Run("random(0)", func(t *testing.T) {
 					runTestCase(ctx, t, tt)
 				})
@@ -305,7 +300,7 @@ func runTestCase(ctx context.Context, t *testing.T, tt rolloutSequenceTestCase)
 
 	rng := rand.New(rand.NewSource(tt.seed)) //nolint:gosec // it is ok to use a weak randomizer here
 	fLogger := newFileLogger(t, tt.name, fmt.Sprintf("testdata/%s", tt.logAndGoldenFileName))
-	// uncomment this line to automatically generate/update golden files: fLogger.writeGoldenFile = true
+	fLogger.writeGoldenFile = true
 
 	// Init current and desired state from test case
 	current := tt.currentScope.Clone()
@@ -888,8 +883,14 @@ func createMD(failureDomain string, replicas int32, maxSurge, maxUnavailable int
 	}
 }
 
-func createMS(name, failureDomain string, replicas int32) *clusterv1.MachineSet {
-	return &clusterv1.MachineSet{
+func withStatusAvailableReplicas(r int32) fakeMachineSetOption {
+	return func(ms *clusterv1.MachineSet) {
+		ms.Status.AvailableReplicas = ptr.To(r)
+	}
+}
+
+func createMS(name, failureDomain string, replicas int32, opts ...fakeMachineSetOption) *clusterv1.MachineSet {
+	ms := &clusterv1.MachineSet{
 		ObjectMeta: metav1.ObjectMeta{
 			Name: name,
 		},
@@ -903,6 +904,10 @@ func createMS(name, failureDomain string, replicas int32) *clusterv1.MachineSet
 			AvailableReplicas: ptr.To(replicas),
 		},
 	}
+	for _, opt := range opts {
+		opt(ms)
+	}
+	return ms
 }
 
 func createM(name, ownedByMS, failureDomain string) *clusterv1.Machine {
diff --git a/internal/controllers/machinedeployment/testdata/regular rollout, 12 replicas, maxsurge 3, maxunavailable 1, scale down to 6, random(0).test.log.golden b/internal/controllers/machinedeployment/testdata/regular rollout, 12 replicas, maxsurge 3, maxunavailable 1, scale down to 6, random(0).test.log.golden
new file mode 100644
index 000000000000..eaa1068c4f5e
--- /dev/null
+++ b/internal/controllers/machinedeployment/testdata/regular rollout, 12 replicas, maxsurge 3, maxunavailable 1, scale down to 6, random(0).test.log.golden	
@@ -0,0 +1,59 @@
+## Regular rollout, 12 Replicas, maxSurge 3, maxUnavailable 1, scale down to 6, random(0)
+
+[Test] Initial state
+  md, 6/6 replicas
+  - ms1, 9/9 replicas (m4,m5,m6,m7,m8,m9,m10,m11,m12)
+  - ms2, 3/3 replicas (m13,m14,m15)
+[Test] Rollout 12 replicas, MaxSurge=3, MaxUnavailable=1, random(0)
+[MS controller] Iteration 1, Reconcile ms2, 3/3 replicas (m13,m14,m15)
+[MS controller] Iteration 1, Reconcile ms1, 9/9 replicas (m4,m5,m6,m7,m8,m9,m10,m11,m12)
+[MD controller] Iteration 1, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 6/6 replicas
+  - ms1, 9/9 replicas (m4,m5,m6,m7,m8,m9,m10,m11,m12)
+  - ms2, 3/3 replicas (m13,m14,m15)
+[MD controller] - Result of rollout planner
+  md, 12/6 replicas
+  - ms1, 9/2 replicas (m4,m5,m6,m7,m8,m9,m10,m11,m12)
+  - ms2, 3/3 replicas (m13,m14,m15)
+[Toleration] tolerate maxSurge breach
+[MS controller] Iteration 2, Reconcile ms1, 9/2 replicas (m4,m5,m6,m7,m8,m9,m10,m11,m12)
+[MS controller] - ms1 scale down to 2/2 replicas (m4,m5,m6,m7,m8,m9,m10 deleted)
+[MS controller] Iteration 2, Reconcile ms1, 2/2 replicas (m11,m12)
+[MD controller] Iteration 3, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 12/6 replicas
+  - ms1, 2/2 replicas (m11,m12)
+  - ms2, 3/3 replicas (m13,m14,m15)
+[MD controller] - Result of rollout planner
+  md, 5/6 replicas
+  - ms1, 2/2 replicas (m11,m12)
+  - ms2, 3/6 replicas (m13,m14,m15)
+[MS controller] Iteration 3, Reconcile ms2, 3/6 replicas (m13,m14,m15)
+[MS controller] - ms2 scale up to 6/6 replicas (m16,m17,m18 created)
+[MD controller] Iteration 4, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 5/6 replicas
+  - ms1, 2/2 replicas (m11,m12)
+  - ms2, 6/6 replicas (m13,m14,m15,m16,m17,m18)
+[MD controller] - Result of rollout planner
+  md, 8/6 replicas
+  - ms1, 2/0 replicas (m11,m12)
+  - ms2, 6/6 replicas (m13,m14,m15,m16,m17,m18)
+[MS controller] Iteration 4, Reconcile ms1, 2/0 replicas (m11,m12)
+[MS controller] - ms1 scale down to 0/0 replicas (m11,m12 deleted)
+[MS controller] Iteration 4, Reconcile ms2, 6/6 replicas (m13,m14,m15,m16,m17,m18)
+[MS controller] Iteration 5, Reconcile ms2, 6/6 replicas (m13,m14,m15,m16,m17,m18)
+[MD controller] Iteration 5, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 8/6 replicas
+  - ms1, 0/0 replicas ()
+  - ms2, 6/6 replicas (m13,m14,m15,m16,m17,m18)
+[MD controller] - Result of rollout planner
+  md, 6/6 replicas
+  - ms1, 0/0 replicas ()
+  - ms2, 6/6 replicas (m13,m14,m15,m16,m17,m18)
+[Test] Final state
+  md, 6/6 replicas
+  - ms1, 0/0 replicas ()
+  - ms2, 6/6 replicas (m13,m14,m15,m16,m17,m18)
diff --git a/internal/controllers/machinedeployment/testdata/regular rollout, 3 replicas, maxsurge 0, maxunavailable 1, random(0).test.log.golden b/internal/controllers/machinedeployment/testdata/regular rollout, 3 replicas, maxsurge 0, maxunavailable 1, random(0).test.log.golden
new file mode 100644
index 000000000000..5a971c842f9d
--- /dev/null
+++ b/internal/controllers/machinedeployment/testdata/regular rollout, 3 replicas, maxsurge 0, maxunavailable 1, random(0).test.log.golden	
@@ -0,0 +1,105 @@
+## Regular rollout, 3 Replicas, maxSurge 0, maxUnavailable 1, random(0)
+
+[Test] Initial state
+  md, 3/3 replicas
+  - ms1, 3/3 replicas (m1,m2,m3)
+  - ms2, 0/0 replicas ()
+[Test] Rollout 3 replicas, MaxSurge=0, MaxUnavailable=1, random(0)
+[MS controller] Iteration 1, Reconcile ms2, 0/0 replicas ()
+[MS controller] Iteration 1, Reconcile ms1, 3/3 replicas (m1,m2,m3)
+[MD controller] Iteration 1, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 3/3 replicas
+  - ms1, 3/3 replicas (m1,m2,m3)
+  - ms2, 0/0 replicas ()
+[MD controller] - Result of rollout planner
+  md, 3/3 replicas
+  - ms1, 3/2 replicas (m1,m2,m3)
+  - ms2, 0/0 replicas ()
+[MS controller] Iteration 2, Reconcile ms1, 3/2 replicas (m1,m2,m3)
+[MS controller] - ms1 scale down to 2/2 replicas (m1 deleted)
+[MS controller] Iteration 2, Reconcile ms1, 2/2 replicas (m2,m3)
+[MD controller] Iteration 3, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 3/3 replicas
+  - ms1, 2/2 replicas (m2,m3)
+  - ms2, 0/0 replicas ()
+[MD controller] - Result of rollout planner
+  md, 2/3 replicas
+  - ms1, 2/2 replicas (m2,m3)
+  - ms2, 0/1 replicas ()
+[MS controller] Iteration 3, Reconcile ms2, 0/1 replicas ()
+[MS controller] - ms2 scale up to 1/1 replicas (m4 created)
+[MD controller] Iteration 4, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 2/3 replicas
+  - ms1, 2/2 replicas (m2,m3)
+  - ms2, 1/1 replicas (m4)
+[MD controller] - Result of rollout planner
+  md, 3/3 replicas
+  - ms1, 2/1 replicas (m2,m3)
+  - ms2, 1/1 replicas (m4)
+[MS controller] Iteration 4, Reconcile ms1, 2/1 replicas (m2,m3)
+[MS controller] - ms1 scale down to 1/1 replicas (m2 deleted)
+[MS controller] Iteration 4, Reconcile ms2, 1/1 replicas (m4)
+[MS controller] Iteration 5, Reconcile ms2, 1/1 replicas (m4)
+[MD controller] Iteration 5, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 3/3 replicas
+  - ms1, 1/1 replicas (m3)
+  - ms2, 1/1 replicas (m4)
+[MD controller] - Result of rollout planner
+  md, 2/3 replicas
+  - ms1, 1/1 replicas (m3)
+  - ms2, 1/2 replicas (m4)
+[MS controller] Iteration 6, Reconcile ms2, 1/2 replicas (m4)
+[MS controller] - ms2 scale up to 2/2 replicas (m5 created)
+[MD controller] Iteration 7, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 2/3 replicas
+  - ms1, 1/1 replicas (m3)
+  - ms2, 2/2 replicas (m4,m5)
+[MD controller] - Result of rollout planner
+  md, 3/3 replicas
+  - ms1, 1/0 replicas (m3)
+  - ms2, 2/2 replicas (m4,m5)
+[MS controller] Iteration 7, Reconcile ms2, 2/2 replicas (m4,m5)
+[MD controller] Iteration 8, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 3/3 replicas
+  - ms1, 1/0 replicas (m3)
+  - ms2, 2/2 replicas (m4,m5)
+[MD controller] - Result of rollout planner
+  md, 3/3 replicas
+  - ms1, 1/0 replicas (m3)
+  - ms2, 2/2 replicas (m4,m5)
+[MS controller] Iteration 8, Reconcile ms1, 1/0 replicas (m3)
+[MS controller] - ms1 scale down to 0/0 replicas (m3 deleted)
+[MS controller] Iteration 9, Reconcile ms2, 2/2 replicas (m4,m5)
+[MS controller] Iteration 9, Reconcile ms1, 0/0 replicas ()
+[MD controller] Iteration 9, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 3/3 replicas
+  - ms1, 0/0 replicas ()
+  - ms2, 2/2 replicas (m4,m5)
+[MD controller] - Result of rollout planner
+  md, 2/3 replicas
+  - ms1, 0/0 replicas ()
+  - ms2, 2/3 replicas (m4,m5)
+[MS controller] Iteration 10, Reconcile ms2, 2/3 replicas (m4,m5)
+[MS controller] - ms2 scale up to 3/3 replicas (m6 created)
+[MS controller] Iteration 10, Reconcile ms1, 0/0 replicas ()
+[MS controller] Iteration 11, Reconcile ms2, 3/3 replicas (m4,m5,m6)
+[MD controller] Iteration 11, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 2/3 replicas
+  - ms1, 0/0 replicas ()
+  - ms2, 3/3 replicas (m4,m5,m6)
+[MD controller] - Result of rollout planner
+  md, 3/3 replicas
+  - ms1, 0/0 replicas ()
+  - ms2, 3/3 replicas (m4,m5,m6)
+[Test] Final state
+  md, 3/3 replicas
+  - ms1, 0/0 replicas ()
+  - ms2, 3/3 replicas (m4,m5,m6)
diff --git a/internal/controllers/machinedeployment/testdata/regular rollout, 3 replicas, maxsurge 1, maxunavailable 0, random(0).test.log.golden b/internal/controllers/machinedeployment/testdata/regular rollout, 3 replicas, maxsurge 1, maxunavailable 0, random(0).test.log.golden
new file mode 100644
index 000000000000..5045410bc79a
--- /dev/null
+++ b/internal/controllers/machinedeployment/testdata/regular rollout, 3 replicas, maxsurge 1, maxunavailable 0, random(0).test.log.golden	
@@ -0,0 +1,136 @@
+## Regular rollout, 3 Replicas, maxSurge 1, maxUnavailable 0, random(0)
+
+[Test] Initial state
+  md, 3/3 replicas
+  - ms1, 3/3 replicas (m1,m2,m3)
+  - ms2, 0/0 replicas ()
+[Test] Rollout 3 replicas, MaxSurge=1, MaxUnavailable=0, random(0)
+[MS controller] Iteration 1, Reconcile ms2, 0/0 replicas ()
+[MS controller] Iteration 1, Reconcile ms1, 3/3 replicas (m1,m2,m3)
+[MD controller] Iteration 1, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 3/3 replicas
+  - ms1, 3/3 replicas (m1,m2,m3)
+  - ms2, 0/0 replicas ()
+[MD controller] - Result of rollout planner
+  md, 3/3 replicas
+  - ms1, 3/3 replicas (m1,m2,m3)
+  - ms2, 0/1 replicas ()
+[MS controller] Iteration 2, Reconcile ms1, 3/3 replicas (m1,m2,m3)
+[MS controller] Iteration 2, Reconcile ms1, 3/3 replicas (m1,m2,m3)
+[MD controller] Iteration 3, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 3/3 replicas
+  - ms1, 3/3 replicas (m1,m2,m3)
+  - ms2, 0/1 replicas ()
+[MD controller] - Result of rollout planner
+  md, 3/3 replicas
+  - ms1, 3/3 replicas (m1,m2,m3)
+  - ms2, 0/1 replicas ()
+[MS controller] Iteration 3, Reconcile ms2, 0/1 replicas ()
+[MS controller] - ms2 scale up to 1/1 replicas (m4 created)
+[MD controller] Iteration 4, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 3/3 replicas
+  - ms1, 3/3 replicas (m1,m2,m3)
+  - ms2, 1/1 replicas (m4)
+[MD controller] - Result of rollout planner
+  md, 4/3 replicas
+  - ms1, 3/2 replicas (m1,m2,m3)
+  - ms2, 1/1 replicas (m4)
+[MS controller] Iteration 4, Reconcile ms1, 3/2 replicas (m1,m2,m3)
+[MS controller] - ms1 scale down to 2/2 replicas (m1 deleted)
+[MS controller] Iteration 4, Reconcile ms2, 1/1 replicas (m4)
+[MS controller] Iteration 5, Reconcile ms2, 1/1 replicas (m4)
+[MD controller] Iteration 5, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 4/3 replicas
+  - ms1, 2/2 replicas (m2,m3)
+  - ms2, 1/1 replicas (m4)
+[MD controller] - Result of rollout planner
+  md, 3/3 replicas
+  - ms1, 2/2 replicas (m2,m3)
+  - ms2, 1/2 replicas (m4)
+[MS controller] Iteration 6, Reconcile ms2, 1/2 replicas (m4)
+[MS controller] - ms2 scale up to 2/2 replicas (m5 created)
+[MD controller] Iteration 7, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 3/3 replicas
+  - ms1, 2/2 replicas (m2,m3)
+  - ms2, 2/2 replicas (m4,m5)
+[MD controller] - Result of rollout planner
+  md, 4/3 replicas
+  - ms1, 2/1 replicas (m2,m3)
+  - ms2, 2/2 replicas (m4,m5)
+[MS controller] Iteration 7, Reconcile ms2, 2/2 replicas (m4,m5)
+[MD controller] Iteration 8, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 4/3 replicas
+  - ms1, 2/1 replicas (m2,m3)
+  - ms2, 2/2 replicas (m4,m5)
+[MD controller] - Result of rollout planner
+  md, 4/3 replicas
+  - ms1, 2/1 replicas (m2,m3)
+  - ms2, 2/2 replicas (m4,m5)
+[MS controller] Iteration 8, Reconcile ms1, 2/1 replicas (m2,m3)
+[MS controller] - ms1 scale down to 1/1 replicas (m2 deleted)
+[MS controller] Iteration 9, Reconcile ms2, 2/2 replicas (m4,m5)
+[MS controller] Iteration 9, Reconcile ms1, 1/1 replicas (m3)
+[MD controller] Iteration 9, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 4/3 replicas
+  - ms1, 1/1 replicas (m3)
+  - ms2, 2/2 replicas (m4,m5)
+[MD controller] - Result of rollout planner
+  md, 3/3 replicas
+  - ms1, 1/1 replicas (m3)
+  - ms2, 2/3 replicas (m4,m5)
+[MS controller] Iteration 10, Reconcile ms2, 2/3 replicas (m4,m5)
+[MS controller] - ms2 scale up to 3/3 replicas (m6 created)
+[MS controller] Iteration 10, Reconcile ms1, 1/1 replicas (m3)
+[MS controller] Iteration 11, Reconcile ms2, 3/3 replicas (m4,m5,m6)
+[MD controller] Iteration 11, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 3/3 replicas
+  - ms1, 1/1 replicas (m3)
+  - ms2, 3/3 replicas (m4,m5,m6)
+[MD controller] - Result of rollout planner
+  md, 4/3 replicas
+  - ms1, 1/0 replicas (m3)
+  - ms2, 3/3 replicas (m4,m5,m6)
+[MD controller] Iteration 12, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 4/3 replicas
+  - ms1, 1/0 replicas (m3)
+  - ms2, 3/3 replicas (m4,m5,m6)
+[MD controller] - Result of rollout planner
+  md, 4/3 replicas
+  - ms1, 1/0 replicas (m3)
+  - ms2, 3/3 replicas (m4,m5,m6)
+[MD controller] Iteration 14, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 4/3 replicas
+  - ms1, 1/0 replicas (m3)
+  - ms2, 3/3 replicas (m4,m5,m6)
+[MD controller] - Result of rollout planner
+  md, 4/3 replicas
+  - ms1, 1/0 replicas (m3)
+  - ms2, 3/3 replicas (m4,m5,m6)
+[MS controller] Iteration 15, Reconcile ms2, 3/3 replicas (m4,m5,m6)
+[MS controller] Iteration 16, Reconcile ms2, 3/3 replicas (m4,m5,m6)
+[MS controller] Iteration 16, Reconcile ms2, 3/3 replicas (m4,m5,m6)
+[MS controller] Iteration 16, Reconcile ms1, 1/0 replicas (m3)
+[MS controller] - ms1 scale down to 0/0 replicas (m3 deleted)
+[MD controller] Iteration 16, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 4/3 replicas
+  - ms1, 0/0 replicas ()
+  - ms2, 3/3 replicas (m4,m5,m6)
+[MD controller] - Result of rollout planner
+  md, 3/3 replicas
+  - ms1, 0/0 replicas ()
+  - ms2, 3/3 replicas (m4,m5,m6)
+[Test] Final state
+  md, 3/3 replicas
+  - ms1, 0/0 replicas ()
+  - ms2, 3/3 replicas (m4,m5,m6)
diff --git a/internal/controllers/machinedeployment/testdata/regular rollout, 6 replicas, maxsurge 0, maxunavailable 10, random(0).test.log.golden b/internal/controllers/machinedeployment/testdata/regular rollout, 6 replicas, maxsurge 0, maxunavailable 10, random(0).test.log.golden
new file mode 100644
index 000000000000..87e5e2c9323a
--- /dev/null
+++ b/internal/controllers/machinedeployment/testdata/regular rollout, 6 replicas, maxsurge 0, maxunavailable 10, random(0).test.log.golden	
@@ -0,0 +1,47 @@
+## Regular rollout, 6 Replicas, maxSurge 0, maxUnavailable 10, random(0)
+
+[Test] Initial state
+  md, 6/6 replicas
+  - ms1, 6/6 replicas (m1,m2,m3,m4,m5,m6)
+  - ms2, 0/0 replicas ()
+[Test] Rollout 6 replicas, MaxSurge=0, MaxUnavailable=10, random(0)
+[MS controller] Iteration 1, Reconcile ms2, 0/0 replicas ()
+[MS controller] Iteration 1, Reconcile ms1, 6/6 replicas (m1,m2,m3,m4,m5,m6)
+[MD controller] Iteration 1, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 6/6 replicas
+  - ms1, 6/6 replicas (m1,m2,m3,m4,m5,m6)
+  - ms2, 0/0 replicas ()
+[MD controller] - Result of rollout planner
+  md, 6/6 replicas
+  - ms1, 6/0 replicas (m1,m2,m3,m4,m5,m6)
+  - ms2, 0/0 replicas ()
+[MS controller] Iteration 2, Reconcile ms1, 6/0 replicas (m1,m2,m3,m4,m5,m6)
+[MS controller] - ms1 scale down to 0/0 replicas (m1,m2,m3,m4,m5,m6 deleted)
+[MS controller] Iteration 2, Reconcile ms1, 0/0 replicas ()
+[MD controller] Iteration 3, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 6/6 replicas
+  - ms1, 0/0 replicas ()
+  - ms2, 0/0 replicas ()
+[MD controller] - Result of rollout planner
+  md, 0/6 replicas
+  - ms1, 0/0 replicas ()
+  - ms2, 0/6 replicas ()
+[MS controller] Iteration 3, Reconcile ms2, 0/6 replicas ()
+[MS controller] - ms2 scale up to 6/6 replicas (m7,m8,m9,m10,m11,m12 created)
+[MD controller] Iteration 4, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 0/6 replicas
+  - ms1, 0/0 replicas ()
+  - ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12)
+[MD controller] - Result of rollout planner
+  md, 6/6 replicas
+  - ms1, 0/0 replicas ()
+  - ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12)
+[MS controller] Iteration 4, Reconcile ms1, 0/0 replicas ()
+[MS controller] Iteration 4, Reconcile ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12)
+[Test] Final state
+  md, 6/6 replicas
+  - ms1, 0/0 replicas ()
+  - ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12)
diff --git a/internal/controllers/machinedeployment/testdata/regular rollout, 6 replicas, maxsurge 1, maxunavailable 3, random(0).test.log.golden b/internal/controllers/machinedeployment/testdata/regular rollout, 6 replicas, maxsurge 1, maxunavailable 3, random(0).test.log.golden
new file mode 100644
index 000000000000..e0ed895585e0
--- /dev/null
+++ b/internal/controllers/machinedeployment/testdata/regular rollout, 6 replicas, maxsurge 1, maxunavailable 3, random(0).test.log.golden	
@@ -0,0 +1,70 @@
+## Regular rollout, 6 Replicas, maxSurge 1, maxUnavailable 3, random(0)
+
+[Test] Initial state
+  md, 6/6 replicas
+  - ms1, 6/6 replicas (m1,m2,m3,m4,m5,m6)
+  - ms2, 0/0 replicas ()
+[Test] Rollout 6 replicas, MaxSurge=1, MaxUnavailable=3, random(0)
+[MS controller] Iteration 1, Reconcile ms2, 0/0 replicas ()
+[MS controller] Iteration 1, Reconcile ms1, 6/6 replicas (m1,m2,m3,m4,m5,m6)
+[MD controller] Iteration 1, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 6/6 replicas
+  - ms1, 6/6 replicas (m1,m2,m3,m4,m5,m6)
+  - ms2, 0/0 replicas ()
+[MD controller] - Result of rollout planner
+  md, 6/6 replicas
+  - ms1, 6/3 replicas (m1,m2,m3,m4,m5,m6)
+  - ms2, 0/1 replicas ()
+[MS controller] Iteration 2, Reconcile ms1, 6/3 replicas (m1,m2,m3,m4,m5,m6)
+[MS controller] - ms1 scale down to 3/3 replicas (m1,m2,m3 deleted)
+[MS controller] Iteration 2, Reconcile ms1, 3/3 replicas (m4,m5,m6)
+[MD controller] Iteration 3, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 6/6 replicas
+  - ms1, 3/3 replicas (m4,m5,m6)
+  - ms2, 0/1 replicas ()
+[MD controller] - Result of rollout planner
+  md, 3/6 replicas
+  - ms1, 3/3 replicas (m4,m5,m6)
+  - ms2, 0/4 replicas ()
+[MS controller] Iteration 3, Reconcile ms2, 0/4 replicas ()
+[MS controller] - ms2 scale up to 4/4 replicas (m7,m8,m9,m10 created)
+[MD controller] Iteration 4, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 3/6 replicas
+  - ms1, 3/3 replicas (m4,m5,m6)
+  - ms2, 4/4 replicas (m7,m8,m9,m10)
+[MD controller] - Result of rollout planner
+  md, 7/6 replicas
+  - ms1, 3/0 replicas (m4,m5,m6)
+  - ms2, 4/4 replicas (m7,m8,m9,m10)
+[MS controller] Iteration 4, Reconcile ms1, 3/0 replicas (m4,m5,m6)
+[MS controller] - ms1 scale down to 0/0 replicas (m4,m5,m6 deleted)
+[MS controller] Iteration 4, Reconcile ms2, 4/4 replicas (m7,m8,m9,m10)
+[MS controller] Iteration 5, Reconcile ms2, 4/4 replicas (m7,m8,m9,m10)
+[MD controller] Iteration 5, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 7/6 replicas
+  - ms1, 0/0 replicas ()
+  - ms2, 4/4 replicas (m7,m8,m9,m10)
+[MD controller] - Result of rollout planner
+  md, 4/6 replicas
+  - ms1, 0/0 replicas ()
+  - ms2, 4/6 replicas (m7,m8,m9,m10)
+[MS controller] Iteration 6, Reconcile ms2, 4/6 replicas (m7,m8,m9,m10)
+[MS controller] - ms2 scale up to 6/6 replicas (m11,m12 created)
+[MD controller] Iteration 7, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 4/6 replicas
+  - ms1, 0/0 replicas ()
+  - ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12)
+[MD controller] - Result of rollout planner
+  md, 6/6 replicas
+  - ms1, 0/0 replicas ()
+  - ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12)
+[MS controller] Iteration 7, Reconcile ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12)
+[Test] Final state
+  md, 6/6 replicas
+  - ms1, 0/0 replicas ()
+  - ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12)
diff --git a/internal/controllers/machinedeployment/testdata/regular rollout, 6 replicas, maxsurge 10, maxunavailable 0, random(0).test.log.golden b/internal/controllers/machinedeployment/testdata/regular rollout, 6 replicas, maxsurge 10, maxunavailable 0, random(0).test.log.golden
new file mode 100644
index 000000000000..067f8cb229e8
--- /dev/null
+++ b/internal/controllers/machinedeployment/testdata/regular rollout, 6 replicas, maxsurge 10, maxunavailable 0, random(0).test.log.golden	
@@ -0,0 +1,57 @@
+## Regular rollout, 6 Replicas, maxSurge 10, maxUnavailable 0, random(0)
+
+[Test] Initial state
+  md, 6/6 replicas
+  - ms1, 6/6 replicas (m1,m2,m3,m4,m5,m6)
+  - ms2, 0/0 replicas ()
+[Test] Rollout 6 replicas, MaxSurge=10, MaxUnavailable=0, random(0)
+[MS controller] Iteration 1, Reconcile ms2, 0/0 replicas ()
+[MS controller] Iteration 1, Reconcile ms1, 6/6 replicas (m1,m2,m3,m4,m5,m6)
+[MD controller] Iteration 1, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 6/6 replicas
+  - ms1, 6/6 replicas (m1,m2,m3,m4,m5,m6)
+  - ms2, 0/0 replicas ()
+[MD controller] - Result of rollout planner
+  md, 6/6 replicas
+  - ms1, 6/6 replicas (m1,m2,m3,m4,m5,m6)
+  - ms2, 0/6 replicas ()
+[MS controller] Iteration 2, Reconcile ms1, 6/6 replicas (m1,m2,m3,m4,m5,m6)
+[MS controller] Iteration 2, Reconcile ms1, 6/6 replicas (m1,m2,m3,m4,m5,m6)
+[MD controller] Iteration 3, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 6/6 replicas
+  - ms1, 6/6 replicas (m1,m2,m3,m4,m5,m6)
+  - ms2, 0/6 replicas ()
+[MD controller] - Result of rollout planner
+  md, 6/6 replicas
+  - ms1, 6/6 replicas (m1,m2,m3,m4,m5,m6)
+  - ms2, 0/6 replicas ()
+[MS controller] Iteration 3, Reconcile ms2, 0/6 replicas ()
+[MS controller] - ms2 scale up to 6/6 replicas (m7,m8,m9,m10,m11,m12 created)
+[MD controller] Iteration 4, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 6/6 replicas
+  - ms1, 6/6 replicas (m1,m2,m3,m4,m5,m6)
+  - ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12)
+[MD controller] - Result of rollout planner
+  md, 12/6 replicas
+  - ms1, 6/0 replicas (m1,m2,m3,m4,m5,m6)
+  - ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12)
+[MS controller] Iteration 4, Reconcile ms1, 6/0 replicas (m1,m2,m3,m4,m5,m6)
+[MS controller] - ms1 scale down to 0/0 replicas (m1,m2,m3,m4,m5,m6 deleted)
+[MS controller] Iteration 4, Reconcile ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12)
+[MS controller] Iteration 5, Reconcile ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12)
+[MD controller] Iteration 5, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 12/6 replicas
+  - ms1, 0/0 replicas ()
+  - ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12)
+[MD controller] - Result of rollout planner
+  md, 6/6 replicas
+  - ms1, 0/0 replicas ()
+  - ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12)
+[Test] Final state
+  md, 6/6 replicas
+  - ms1, 0/0 replicas ()
+  - ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12)
diff --git a/internal/controllers/machinedeployment/testdata/regular rollout, 6 replicas, maxsurge 3, maxunavailable 1, change spec, random(0).test.log.golden b/internal/controllers/machinedeployment/testdata/regular rollout, 6 replicas, maxsurge 3, maxunavailable 1, change spec, random(0).test.log.golden
new file mode 100644
index 000000000000..d2aad77f1860
--- /dev/null
+++ b/internal/controllers/machinedeployment/testdata/regular rollout, 6 replicas, maxsurge 3, maxunavailable 1, change spec, random(0).test.log.golden	
@@ -0,0 +1,123 @@
+## Regular rollout, 6 Replicas, maxSurge 3, maxUnavailable 1, change spec, random(0)
+
+[Test] Initial state
+  md, 6/6 replicas
+  - ms1, 3/3 replicas (m4,m5,m6)
+  - ms2, 3/3 replicas (m7,m8,m9)
+  - ms3, 0/0 replicas ()
+[Test] Rollout 6 replicas, MaxSurge=3, MaxUnavailable=1, random(0)
+[MS controller] Iteration 1, Reconcile ms1, 3/3 replicas (m4,m5,m6)
+[MS controller] Iteration 1, Reconcile ms2, 3/3 replicas (m7,m8,m9)
+[MS controller] Iteration 1, Reconcile ms3, 0/0 replicas ()
+[MS controller] Iteration 2, Reconcile ms3, 0/0 replicas ()
+[MS controller] Iteration 3, Reconcile ms1, 3/3 replicas (m4,m5,m6)
+[MS controller] Iteration 3, Reconcile ms2, 3/3 replicas (m7,m8,m9)
+[MS controller] Iteration 3, Reconcile ms2, 3/3 replicas (m7,m8,m9)
+[MS controller] Iteration 3, Reconcile ms3, 0/0 replicas ()
+[MD controller] Iteration 3, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 6/6 replicas
+  - ms1, 3/3 replicas (m4,m5,m6)
+  - ms2, 3/3 replicas (m7,m8,m9)
+  - ms3, 0/0 replicas ()
+[MD controller] - Result of rollout planner
+  md, 6/6 replicas
+  - ms1, 3/2 replicas (m4,m5,m6)
+  - ms2, 3/3 replicas (m7,m8,m9)
+  - ms3, 0/3 replicas ()
+[MS controller] Iteration 3, Reconcile ms2, 3/3 replicas (m7,m8,m9)
+[MS controller] Iteration 3, Reconcile ms3, 0/3 replicas ()
+[MS controller] - ms3 scale up to 3/3 replicas (m10,m11,m12 created)
+[MS controller] Iteration 3, Reconcile ms3, 3/3 replicas (m10,m11,m12)
+[MS controller] Iteration 4, Reconcile ms2, 3/3 replicas (m7,m8,m9)
+[MS controller] Iteration 4, Reconcile ms3, 3/3 replicas (m10,m11,m12)
+[MD controller] Iteration 4, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 6/6 replicas
+  - ms1, 3/2 replicas (m4,m5,m6)
+  - ms2, 3/3 replicas (m7,m8,m9)
+  - ms3, 3/3 replicas (m10,m11,m12)
+[MD controller] - Result of rollout planner
+  md, 9/6 replicas
+  - ms1, 3/0 replicas (m4,m5,m6)
+  - ms2, 3/3 replicas (m7,m8,m9)
+  - ms3, 3/3 replicas (m10,m11,m12)
+[MD controller] Iteration 4, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 9/6 replicas
+  - ms1, 3/0 replicas (m4,m5,m6)
+  - ms2, 3/3 replicas (m7,m8,m9)
+  - ms3, 3/3 replicas (m10,m11,m12)
+[MD controller] - Result of rollout planner
+  md, 9/6 replicas
+  - ms1, 3/0 replicas (m4,m5,m6)
+  - ms2, 3/3 replicas (m7,m8,m9)
+  - ms3, 3/3 replicas (m10,m11,m12)
+[MS controller] Iteration 5, Reconcile ms3, 3/3 replicas (m10,m11,m12)
+[MS controller] Iteration 5, Reconcile ms1, 3/0 replicas (m4,m5,m6)
+[MS controller] - ms1 scale down to 0/0 replicas (m4,m5,m6 deleted)
+[MS controller] Iteration 5, Reconcile ms3, 3/3 replicas (m10,m11,m12)
+[MD controller] Iteration 6, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 9/6 replicas
+  - ms1, 0/0 replicas ()
+  - ms2, 3/3 replicas (m7,m8,m9)
+  - ms3, 3/3 replicas (m10,m11,m12)
+[MD controller] - Result of rollout planner
+  md, 6/6 replicas
+  - ms1, 0/0 replicas ()
+  - ms2, 3/2 replicas (m7,m8,m9)
+  - ms3, 3/6 replicas (m10,m11,m12)
+[MS controller] Iteration 6, Reconcile ms3, 3/6 replicas (m10,m11,m12)
+[MS controller] - ms3 scale up to 6/6 replicas (m13,m14,m15 created)
+[MS controller] Iteration 6, Reconcile ms2, 3/2 replicas (m7,m8,m9)
+[MS controller] - ms2 scale down to 2/2 replicas (m7 deleted)
+[MS controller] Iteration 6, Reconcile ms1, 0/0 replicas ()
+[MS controller] Iteration 7, Reconcile ms2, 2/2 replicas (m8,m9)
+[MS controller] Iteration 7, Reconcile ms3, 6/6 replicas (m10,m11,m12,m13,m14,m15)
+[MS controller] Iteration 8, Reconcile ms1, 0/0 replicas ()
+[MD controller] Iteration 8, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 6/6 replicas
+  - ms1, 0/0 replicas ()
+  - ms2, 2/2 replicas (m8,m9)
+  - ms3, 6/6 replicas (m10,m11,m12,m13,m14,m15)
+[MD controller] - Result of rollout planner
+  md, 8/6 replicas
+  - ms1, 0/0 replicas ()
+  - ms2, 2/0 replicas (m8,m9)
+  - ms3, 6/6 replicas (m10,m11,m12,m13,m14,m15)
+[MS controller] Iteration 8, Reconcile ms1, 0/0 replicas ()
+[MS controller] Iteration 8, Reconcile ms2, 2/0 replicas (m8,m9)
+[MS controller] - ms2 scale down to 0/0 replicas (m8,m9 deleted)
+[MS controller] Iteration 9, Reconcile ms3, 6/6 replicas (m10,m11,m12,m13,m14,m15)
+[MS controller] Iteration 9, Reconcile ms2, 0/0 replicas ()
+[MD controller] Iteration 9, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 8/6 replicas
+  - ms1, 0/0 replicas ()
+  - ms2, 0/0 replicas ()
+  - ms3, 6/6 replicas (m10,m11,m12,m13,m14,m15)
+[MD controller] - Result of rollout planner
+  md, 6/6 replicas
+  - ms1, 0/0 replicas ()
+  - ms2, 0/0 replicas ()
+  - ms3, 6/6 replicas (m10,m11,m12,m13,m14,m15)
+[MD controller] Iteration 9, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 6/6 replicas
+  - ms1, 0/0 replicas ()
+  - ms2, 0/0 replicas ()
+  - ms3, 6/6 replicas (m10,m11,m12,m13,m14,m15)
+[MD controller] - Result of rollout planner
+  md, 6/6 replicas
+  - ms1, 0/0 replicas ()
+  - ms2, 0/0 replicas ()
+  - ms3, 6/6 replicas (m10,m11,m12,m13,m14,m15)
+[MS controller] Iteration 9, Reconcile ms1, 0/0 replicas ()
+[MS controller] Iteration 9, Reconcile ms1, 0/0 replicas ()
+[Test] Final state
+  md, 6/6 replicas
+  - ms1, 0/0 replicas ()
+  - ms2, 0/0 replicas ()
+  - ms3, 6/6 replicas (m10,m11,m12,m13,m14,m15)
diff --git a/internal/controllers/machinedeployment/testdata/regular rollout, 6 replicas, maxsurge 3, maxunavailable 1, random(0).test.log.golden b/internal/controllers/machinedeployment/testdata/regular rollout, 6 replicas, maxsurge 3, maxunavailable 1, random(0).test.log.golden
new file mode 100644
index 000000000000..d2560cd9f190
--- /dev/null
+++ b/internal/controllers/machinedeployment/testdata/regular rollout, 6 replicas, maxsurge 3, maxunavailable 1, random(0).test.log.golden	
@@ -0,0 +1,92 @@
+## Regular rollout, 6 Replicas, maxSurge 3, maxUnavailable 1, random(0)
+
+[Test] Initial state
+  md, 6/6 replicas
+  - ms1, 6/6 replicas (m1,m2,m3,m4,m5,m6)
+  - ms2, 0/0 replicas ()
+[Test] Rollout 6 replicas, MaxSurge=3, MaxUnavailable=1, random(0)
+[MS controller] Iteration 1, Reconcile ms2, 0/0 replicas ()
+[MS controller] Iteration 1, Reconcile ms1, 6/6 replicas (m1,m2,m3,m4,m5,m6)
+[MD controller] Iteration 1, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 6/6 replicas
+  - ms1, 6/6 replicas (m1,m2,m3,m4,m5,m6)
+  - ms2, 0/0 replicas ()
+[MD controller] - Result of rollout planner
+  md, 6/6 replicas
+  - ms1, 6/5 replicas (m1,m2,m3,m4,m5,m6)
+  - ms2, 0/3 replicas ()
+[MS controller] Iteration 2, Reconcile ms1, 6/5 replicas (m1,m2,m3,m4,m5,m6)
+[MS controller] - ms1 scale down to 5/5 replicas (m1 deleted)
+[MS controller] Iteration 2, Reconcile ms1, 5/5 replicas (m2,m3,m4,m5,m6)
+[MD controller] Iteration 3, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 6/6 replicas
+  - ms1, 5/5 replicas (m2,m3,m4,m5,m6)
+  - ms2, 0/3 replicas ()
+[MD controller] - Result of rollout planner
+  md, 5/6 replicas
+  - ms1, 5/5 replicas (m2,m3,m4,m5,m6)
+  - ms2, 0/4 replicas ()
+[MS controller] Iteration 3, Reconcile ms2, 0/4 replicas ()
+[MS controller] - ms2 scale up to 4/4 replicas (m7,m8,m9,m10 created)
+[MD controller] Iteration 4, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 5/6 replicas
+  - ms1, 5/5 replicas (m2,m3,m4,m5,m6)
+  - ms2, 4/4 replicas (m7,m8,m9,m10)
+[MD controller] - Result of rollout planner
+  md, 9/6 replicas
+  - ms1, 5/1 replicas (m2,m3,m4,m5,m6)
+  - ms2, 4/4 replicas (m7,m8,m9,m10)
+[MS controller] Iteration 4, Reconcile ms1, 5/1 replicas (m2,m3,m4,m5,m6)
+[MS controller] - ms1 scale down to 1/1 replicas (m2,m3,m4,m5 deleted)
+[MS controller] Iteration 4, Reconcile ms2, 4/4 replicas (m7,m8,m9,m10)
+[MS controller] Iteration 5, Reconcile ms2, 4/4 replicas (m7,m8,m9,m10)
+[MD controller] Iteration 5, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 9/6 replicas
+  - ms1, 1/1 replicas (m6)
+  - ms2, 4/4 replicas (m7,m8,m9,m10)
+[MD controller] - Result of rollout planner
+  md, 5/6 replicas
+  - ms1, 1/1 replicas (m6)
+  - ms2, 4/6 replicas (m7,m8,m9,m10)
+[MS controller] Iteration 6, Reconcile ms2, 4/6 replicas (m7,m8,m9,m10)
+[MS controller] - ms2 scale up to 6/6 replicas (m11,m12 created)
+[MD controller] Iteration 7, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 5/6 replicas
+  - ms1, 1/1 replicas (m6)
+  - ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12)
+[MD controller] - Result of rollout planner
+  md, 7/6 replicas
+  - ms1, 1/0 replicas (m6)
+  - ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12)
+[MS controller] Iteration 7, Reconcile ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12)
+[MD controller] Iteration 8, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 7/6 replicas
+  - ms1, 1/0 replicas (m6)
+  - ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12)
+[MD controller] - Result of rollout planner
+  md, 7/6 replicas
+  - ms1, 1/0 replicas (m6)
+  - ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12)
+[MS controller] Iteration 8, Reconcile ms1, 1/0 replicas (m6)
+[MS controller] - ms1 scale down to 0/0 replicas (m6 deleted)
+[MS controller] Iteration 9, Reconcile ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12)
+[MS controller] Iteration 9, Reconcile ms1, 0/0 replicas ()
+[MD controller] Iteration 9, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 7/6 replicas
+  - ms1, 0/0 replicas ()
+  - ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12)
+[MD controller] - Result of rollout planner
+  md, 6/6 replicas
+  - ms1, 0/0 replicas ()
+  - ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12)
+[Test] Final state
+  md, 6/6 replicas
+  - ms1, 0/0 replicas ()
+  - ms2, 6/6 replicas (m7,m8,m9,m10,m11,m12)
diff --git a/internal/controllers/machinedeployment/testdata/regular rollout, 6 replicas, maxsurge 3, maxunavailable 1, scale up to 12, random(0).test.log.golden b/internal/controllers/machinedeployment/testdata/regular rollout, 6 replicas, maxsurge 3, maxunavailable 1, scale up to 12, random(0).test.log.golden
new file mode 100644
index 000000000000..f2d37c25a933
--- /dev/null
+++ b/internal/controllers/machinedeployment/testdata/regular rollout, 6 replicas, maxsurge 3, maxunavailable 1, scale up to 12, random(0).test.log.golden	
@@ -0,0 +1,59 @@
+## Regular rollout, 6 Replicas, maxSurge 3, maxUnavailable 1, scale up to 12, random(0)
+
+[Test] Initial state
+  md, 12/12 replicas
+  - ms1, 3/3 replicas (m4,m5,m6)
+  - ms2, 3/3 replicas (m7,m8,m9)
+[Test] Rollout 6 replicas, MaxSurge=3, MaxUnavailable=1, random(0)
+[MS controller] Iteration 1, Reconcile ms2, 3/3 replicas (m7,m8,m9)
+[MS controller] Iteration 1, Reconcile ms1, 3/3 replicas (m4,m5,m6)
+[MD controller] Iteration 1, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 12/12 replicas
+  - ms1, 3/3 replicas (m4,m5,m6)
+  - ms2, 3/3 replicas (m7,m8,m9)
+[MD controller] - Result of rollout planner
+  md, 6/12 replicas
+  - ms1, 3/3 replicas (m4,m5,m6)
+  - ms2, 3/12 replicas (m7,m8,m9)
+[Toleration] tolerate maxUnavailable breach
+[MS controller] Iteration 2, Reconcile ms1, 3/3 replicas (m4,m5,m6)
+[MS controller] Iteration 2, Reconcile ms1, 3/3 replicas (m4,m5,m6)
+[MD controller] Iteration 3, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 6/12 replicas
+  - ms1, 3/3 replicas (m4,m5,m6)
+  - ms2, 3/12 replicas (m7,m8,m9)
+[MD controller] - Result of rollout planner
+  md, 6/12 replicas
+  - ms1, 3/3 replicas (m4,m5,m6)
+  - ms2, 3/12 replicas (m7,m8,m9)
+[Toleration] tolerate maxUnavailable breach
+[MS controller] Iteration 3, Reconcile ms2, 3/12 replicas (m7,m8,m9)
+[MS controller] - ms2 scale up to 12/12 replicas (m10,m11,m12,m13,m14,m15,m16,m17,m18 created)
+[MD controller] Iteration 4, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 6/12 replicas
+  - ms1, 3/3 replicas (m4,m5,m6)
+  - ms2, 12/12 replicas (m7,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18)
+[MD controller] - Result of rollout planner
+  md, 15/12 replicas
+  - ms1, 3/0 replicas (m4,m5,m6)
+  - ms2, 12/12 replicas (m7,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18)
+[MS controller] Iteration 4, Reconcile ms1, 3/0 replicas (m4,m5,m6)
+[MS controller] - ms1 scale down to 0/0 replicas (m4,m5,m6 deleted)
+[MS controller] Iteration 4, Reconcile ms2, 12/12 replicas (m7,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18)
+[MS controller] Iteration 5, Reconcile ms2, 12/12 replicas (m7,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18)
+[MD controller] Iteration 5, Reconcile md
+[MD controller] - Input to rollout planner
+  md, 15/12 replicas
+  - ms1, 0/0 replicas ()
+  - ms2, 12/12 replicas (m7,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18)
+[MD controller] - Result of rollout planner
+  md, 12/12 replicas
+  - ms1, 0/0 replicas ()
+  - ms2, 12/12 replicas (m7,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18)
+[Test] Final state
+  md, 12/12 replicas
+  - ms1, 0/0 replicas ()
+  - ms2, 12/12 replicas (m7,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18)

From 142d1de7defd8bbeae85505e65a0c6b2186035c6 Mon Sep 17 00:00:00 2001
From: fabriziopandini <fpandini@vmware.com>
Date: Sat, 4 Oct 2025 13:39:03 +0200
Subject: [PATCH 2/5] Minor improvements

---
 .../machinedeployment_rolling.go              | 10 +-
 .../machinedeployment_rolling_test.go         | 36 +++----
 ...machinedeployment_rollout_sequence_test.go | 97 +++++++++++--------
 3 files changed, 82 insertions(+), 61 deletions(-)

diff --git a/internal/controllers/machinedeployment/machinedeployment_rolling.go b/internal/controllers/machinedeployment/machinedeployment_rolling.go
index 85f10cbc3b00..6e75fe8df344 100644
--- a/internal/controllers/machinedeployment/machinedeployment_rolling.go
+++ b/internal/controllers/machinedeployment/machinedeployment_rolling.go
@@ -59,7 +59,7 @@ func (r *Reconciler) rolloutRolling(ctx context.Context, md *clusterv1.MachineDe
 	planner.newMS = newMS
 	planner.oldMSs = oldMSs
 
-	if err := planner.Plan(ctx); err != nil {
+	if err := planner.planRolloutRolling(ctx); err != nil {
 		return err
 	}
 
@@ -100,8 +100,8 @@ func newRolloutPlanner() *rolloutPlanner {
 	}
 }
 
-// Plan determine how to proceed with the rollout if we are not yet at the desired state.
-func (p *rolloutPlanner) Plan(ctx context.Context) error {
+// planRolloutRolling determine how to proceed with the rollout when using the RolloutRolling strategy if we are not yet at the desired state.
+func (p *rolloutPlanner) planRolloutRolling(ctx context.Context) error {
 	if p.md.Spec.Replicas == nil {
 		return errors.Errorf("spec.replicas for MachineDeployment %v is nil, this is unexpected", client.ObjectKeyFromObject(p.md))
 	}
@@ -122,7 +122,7 @@ func (p *rolloutPlanner) Plan(ctx context.Context) error {
 	}
 
 	// Scale down, if we can.
-	if err := p.reconcileOldMachineSets(ctx); err != nil {
+	if err := p.reconcileOldMachineSetsRolloutRolling(ctx); err != nil {
 		return err
 	}
 
@@ -168,7 +168,7 @@ func (p *rolloutPlanner) reconcileNewMachineSet(ctx context.Context) error {
 	return nil
 }
 
-func (p *rolloutPlanner) reconcileOldMachineSets(ctx context.Context) error {
+func (p *rolloutPlanner) reconcileOldMachineSetsRolloutRolling(ctx context.Context) error {
 	allMSs := append(p.oldMSs, p.newMS)
 
 	// no op if there are no replicas on old machinesets
diff --git a/internal/controllers/machinedeployment/machinedeployment_rolling_test.go b/internal/controllers/machinedeployment/machinedeployment_rolling_test.go
index 7579d94d695f..85544da4574d 100644
--- a/internal/controllers/machinedeployment/machinedeployment_rolling_test.go
+++ b/internal/controllers/machinedeployment/machinedeployment_rolling_test.go
@@ -269,7 +269,7 @@ func TestReconcileNewMachineSet(t *testing.T) {
 	}
 }
 
-func Test_reconcileOldMachineSets(t *testing.T) {
+func Test_reconcileOldMachineSetsRolloutRolling(t *testing.T) {
 	var ctx = context.Background()
 
 	tests := []struct {
@@ -284,7 +284,7 @@ func Test_reconcileOldMachineSets(t *testing.T) {
 		{
 			name:        "no op if there are no replicas on old machinesets",
 			scaleIntent: map[string]int32{},
-			md:          createMD("v2", 10, 1, 0),
+			md:          createMD("v2", 10, withRolloutStrategy(1, 0)),
 			newMS:       createMS("ms2", "v2", 10, withStatusReplicas(10), withStatusAvailableReplicas(10)),
 			oldMSs: []*clusterv1.MachineSet{
 				createMS("ms1", "v1", 0, withStatusReplicas(0), withStatusAvailableReplicas(0)),
@@ -296,7 +296,7 @@ func Test_reconcileOldMachineSets(t *testing.T) {
 		{
 			name:        "do not scale down if replicas is equal to minAvailable replicas",
 			scaleIntent: map[string]int32{},
-			md:          createMD("v2", 3, 1, 0),
+			md:          createMD("v2", 3, withRolloutStrategy(1, 0)),
 			newMS:       createMS("ms2", "v2", 1, withStatusReplicas(1), withStatusAvailableReplicas(1)),
 			oldMSs: []*clusterv1.MachineSet{
 				createMS("ms1", "v1", 2, withStatusReplicas(2), withStatusAvailableReplicas(2)),
@@ -309,7 +309,7 @@ func Test_reconcileOldMachineSets(t *testing.T) {
 		{
 			name:        "do not scale down if replicas is less then minAvailable replicas",
 			scaleIntent: map[string]int32{},
-			md:          createMD("v2", 3, 1, 0),
+			md:          createMD("v2", 3, withRolloutStrategy(1, 0)),
 			newMS:       createMS("ms2", "v2", 1, withStatusReplicas(1), withStatusAvailableReplicas(1)),
 			oldMSs: []*clusterv1.MachineSet{
 				createMS("ms1", "v1", 2, withStatusReplicas(2), withStatusAvailableReplicas(1)),
@@ -323,7 +323,7 @@ func Test_reconcileOldMachineSets(t *testing.T) {
 		{
 			name:        "do not scale down if there are more replicas than minAvailable replicas, but scale down from a previous reconcile already takes the availability buffer",
 			scaleIntent: map[string]int32{},
-			md:          createMD("v2", 3, 1, 0),
+			md:          createMD("v2", 3, withRolloutStrategy(1, 0)),
 			newMS:       createMS("ms2", "v2", 1, withStatusReplicas(1), withStatusAvailableReplicas(1)),
 			oldMSs: []*clusterv1.MachineSet{
 				createMS("ms1", "v1", 2, withStatusReplicas(3), withStatusAvailableReplicas(3)), // OldMS is scaling down from a previous reconcile
@@ -336,7 +336,7 @@ func Test_reconcileOldMachineSets(t *testing.T) {
 		{
 			name:        "do not scale down if there are more replicas than minAvailable replicas, but scale down from a previous reconcile already takes the availability buffer, scale down from a previous reconcile on another MSr",
 			scaleIntent: map[string]int32{},
-			md:          createMD("v2", 6, 3, 1),
+			md:          createMD("v2", 6, withRolloutStrategy(3, 1)),
 			newMS:       createMS("ms3", "v2", 3, withStatusReplicas(0), withStatusAvailableReplicas(0)), // NewMS is scaling up from previous reconcile, but replicas do not exists yet
 			oldMSs: []*clusterv1.MachineSet{
 				createMS("ms1", "v1", 2, withStatusReplicas(3), withStatusAvailableReplicas(3)), // OldMS is scaling down from a previous reconcile
@@ -352,7 +352,7 @@ func Test_reconcileOldMachineSets(t *testing.T) {
 			scaleIntent: map[string]int32{
 				"ms2": 1, // newMS (ms2) has a scaling down intent from current reconcile
 			},
-			md:    createMD("v2", 3, 1, 0),
+			md:    createMD("v2", 3, withRolloutStrategy(1, 0)),
 			newMS: createMS("ms2", "v2", 2, withStatusReplicas(2), withStatusAvailableReplicas(2)),
 			oldMSs: []*clusterv1.MachineSet{
 				createMS("ms1", "v1", 2, withStatusReplicas(2), withStatusAvailableReplicas(2)),
@@ -366,7 +366,7 @@ func Test_reconcileOldMachineSets(t *testing.T) {
 		{
 			name:        "do not scale down replicas when there are more replicas than minAvailable replicas, but not all the replicas are available (unavailability on newMS)",
 			scaleIntent: map[string]int32{},
-			md:          createMD("v2", 3, 1, 0),
+			md:          createMD("v2", 3, withRolloutStrategy(1, 0)),
 			newMS:       createMS("ms2", "v2", 1, withStatusReplicas(1), withStatusAvailableReplicas(0)), // no replicas are available
 			oldMSs: []*clusterv1.MachineSet{
 				createMS("ms1", "v1", 3, withStatusReplicas(3), withStatusAvailableReplicas(3)),
@@ -379,7 +379,7 @@ func Test_reconcileOldMachineSets(t *testing.T) {
 		{
 			name:        "do not scale down replicas when there are more replicas than minAvailable replicas, but not all the replicas are available (unavailability on oldMS)",
 			scaleIntent: map[string]int32{},
-			md:          createMD("v2", 3, 1, 0),
+			md:          createMD("v2", 3, withRolloutStrategy(1, 0)),
 			newMS:       createMS("ms2", "v2", 1, withStatusReplicas(1), withStatusAvailableReplicas(1)),
 			oldMSs: []*clusterv1.MachineSet{
 				createMS("ms1", "v1", 3, withStatusReplicas(3), withStatusAvailableReplicas(2)), // only 2 replicas are available
@@ -392,7 +392,7 @@ func Test_reconcileOldMachineSets(t *testing.T) {
 		{
 			name:        "scale down replicas when there are more replicas than minAvailable replicas, all replicas are available",
 			scaleIntent: map[string]int32{},
-			md:          createMD("v2", 3, 1, 0),
+			md:          createMD("v2", 3, withRolloutStrategy(1, 0)),
 			newMS:       createMS("ms2", "v2", 1, withStatusReplicas(1), withStatusAvailableReplicas(1)),
 			oldMSs: []*clusterv1.MachineSet{
 				createMS("ms1", "v1", 3, withStatusReplicas(3), withStatusAvailableReplicas(3)),
@@ -405,7 +405,7 @@ func Test_reconcileOldMachineSets(t *testing.T) {
 		{
 			name:        "scale down replicas when there are more replicas than minAvailable replicas, unavailable replicas are scaled down first",
 			scaleIntent: map[string]int32{},
-			md:          createMD("v2", 3, 1, 0),
+			md:          createMD("v2", 3, withRolloutStrategy(1, 0)),
 			newMS:       createMS("ms4", "v3", 1, withStatusReplicas(1), withStatusAvailableReplicas(1)),
 			oldMSs: []*clusterv1.MachineSet{
 				createMS("ms1", "v0", 2, withStatusReplicas(2), withStatusAvailableReplicas(2)),
@@ -423,7 +423,7 @@ func Test_reconcileOldMachineSets(t *testing.T) {
 		{
 			name:        "scale down replicas when there are more replicas than minAvailable replicas, unavailable replicas are scaled down first, available replicas are scaled down when unavailable replicas are gone",
 			scaleIntent: map[string]int32{},
-			md:          createMD("v2", 3, 1, 0),
+			md:          createMD("v2", 3, withRolloutStrategy(1, 0)),
 			newMS:       createMS("ms4", "v3", 1, withStatusReplicas(1), withStatusAvailableReplicas(1)),
 			oldMSs: []*clusterv1.MachineSet{
 				createMS("ms1", "v0", 3, withStatusReplicas(3), withStatusAvailableReplicas(3)),
@@ -441,7 +441,7 @@ func Test_reconcileOldMachineSets(t *testing.T) {
 		{
 			name:        "scale down replicas when there are more replicas than minAvailable replicas, unavailable replicas are scaled down first, available replicas are scaled down when unavailable replicas are gone is not affected by replicas without machines",
 			scaleIntent: map[string]int32{},
-			md:          createMD("v2", 3, 1, 0),
+			md:          createMD("v2", 3, withRolloutStrategy(1, 0)),
 			newMS:       createMS("ms4", "v3", 1, withStatusReplicas(1), withStatusAvailableReplicas(1)),
 			oldMSs: []*clusterv1.MachineSet{
 				createMS("ms1", "v0", 4, withStatusReplicas(3), withStatusAvailableReplicas(3)), // 1 replica without machine
@@ -459,7 +459,7 @@ func Test_reconcileOldMachineSets(t *testing.T) {
 		{
 			name:        "scale down replicas when there are more replicas than minAvailable replicas, unavailable replicas are scaled down first, scale down stops before breaching minAvailable replicas",
 			scaleIntent: map[string]int32{},
-			md:          createMD("v2", 3, 1, 0),
+			md:          createMD("v2", 3, withRolloutStrategy(1, 0)),
 			newMS:       createMS("ms4", "v3", 1, withStatusReplicas(1), withStatusAvailableReplicas(1)),
 			oldMSs: []*clusterv1.MachineSet{
 				createMS("ms1", "v0", 1, withStatusReplicas(1), withStatusAvailableReplicas(1)),
@@ -476,7 +476,7 @@ func Test_reconcileOldMachineSets(t *testing.T) {
 		{
 			name:        "scale down replicas when there are more replicas than minAvailable replicas, unavailable replicas are scaled down first, scale down stops before breaching minAvailable replicas is not affected by replicas without machines",
 			scaleIntent: map[string]int32{},
-			md:          createMD("v2", 3, 1, 0),
+			md:          createMD("v2", 3, withRolloutStrategy(1, 0)),
 			newMS:       createMS("ms4", "v3", 1, withStatusReplicas(1), withStatusAvailableReplicas(1)),
 			oldMSs: []*clusterv1.MachineSet{
 				createMS("ms1", "v0", 2, withStatusReplicas(1), withStatusAvailableReplicas(1)), // 1 replica without machine
@@ -494,7 +494,7 @@ func Test_reconcileOldMachineSets(t *testing.T) {
 		{
 			name:        "scale down replicas when there are more replicas than minAvailable replicas, scale down keeps into account scale downs from a previous reconcile",
 			scaleIntent: map[string]int32{},
-			md:          createMD("v2", 3, 1, 0),
+			md:          createMD("v2", 3, withRolloutStrategy(1, 0)),
 			newMS:       createMS("ms2", "v3", 2, withStatusReplicas(2), withStatusAvailableReplicas(2)),
 			oldMSs: []*clusterv1.MachineSet{
 				createMS("ms1", "v0", 3, withStatusReplicas(4), withStatusAvailableReplicas(4)), // OldMS is scaling down from a previous reconcile
@@ -509,7 +509,7 @@ func Test_reconcileOldMachineSets(t *testing.T) {
 			scaleIntent: map[string]int32{
 				"ms2": 1, // newMS (ms2) has a scaling down intent from current reconcile
 			},
-			md:    createMD("v2", 3, 1, 0),
+			md:    createMD("v2", 3, withRolloutStrategy(1, 0)),
 			newMS: createMS("ms2", "v3", 2, withStatusReplicas(2), withStatusAvailableReplicas(2)),
 			oldMSs: []*clusterv1.MachineSet{
 				createMS("ms1", "v0", 3, withStatusReplicas(3), withStatusAvailableReplicas(3)),
@@ -531,7 +531,7 @@ func Test_reconcileOldMachineSets(t *testing.T) {
 				oldMSs:       tt.oldMSs,
 				scaleIntents: tt.scaleIntent,
 			}
-			err := p.reconcileOldMachineSets(ctx)
+			err := p.reconcileOldMachineSetsRolloutRolling(ctx)
 			g.Expect(err).ToNot(HaveOccurred())
 			g.Expect(p.scaleIntents).To(Equal(tt.expectScaleIntent), "unexpected scaleIntents")
 
diff --git a/internal/controllers/machinedeployment/machinedeployment_rollout_sequence_test.go b/internal/controllers/machinedeployment/machinedeployment_rollout_sequence_test.go
index d4aeb9d754dd..bd42251386ec 100644
--- a/internal/controllers/machinedeployment/machinedeployment_rollout_sequence_test.go
+++ b/internal/controllers/machinedeployment/machinedeployment_rollout_sequence_test.go
@@ -40,7 +40,7 @@ import (
 	"sigs.k8s.io/cluster-api/internal/controllers/machinedeployment/mdutil"
 )
 
-type rolloutSequenceTestCase struct {
+type rolloutRollingSequenceTestCase struct {
 	name           string
 	maxSurge       int32
 	maxUnavailable int32
@@ -113,7 +113,7 @@ func Test_rolloutRollingSequences(t *testing.T) {
 	ctx = ctrl.LoggerInto(ctx, klog.Background())
 	klog.SetOutput(ginkgo.GinkgoWriter)
 
-	tests := []rolloutSequenceTestCase{
+	tests := []rolloutRollingSequenceTestCase{
 		// Regular rollout (no in-place)
 
 		{ // scale out by 1
@@ -163,7 +163,7 @@ func Test_rolloutRollingSequences(t *testing.T) {
 			maxSurge:       3,
 			maxUnavailable: 1,
 			currentScope: &rolloutScope{ // Manually providing a scope simulating a MD originally with 6 replica in the middle of a rollout, with 3 machines already created in the newMS and 3 still on the oldMS, and then MD scaled up to 12.
-				machineDeployment: createMD("v2", 12, 3, 1),
+				machineDeployment: createMD("v2", 12, withRolloutStrategy(3, 1)),
 				machineSets: []*clusterv1.MachineSet{
 					createMS("ms1", "v1", 3),
 					createMS("ms2", "v2", 3),
@@ -191,7 +191,7 @@ func Test_rolloutRollingSequences(t *testing.T) {
 			maxSurge:       3,
 			maxUnavailable: 1,
 			currentScope: &rolloutScope{ // Manually providing a scope simulating a MD originally with 12 replica in the middle of a rollout, with 3 machines already created in the newMS and 9 still on the oldMS, and then MD scaled down to 6.
-				machineDeployment: createMD("v2", 6, 3, 1),
+				machineDeployment: createMD("v2", 6, withRolloutStrategy(3, 1)),
 				machineSets: []*clusterv1.MachineSet{
 					createMS("ms1", "v1", 9),
 					createMS("ms2", "v2", 3),
@@ -225,7 +225,7 @@ func Test_rolloutRollingSequences(t *testing.T) {
 			maxSurge:       3,
 			maxUnavailable: 1,
 			currentScope: &rolloutScope{ // Manually providing a scope simulating a MD with 6 replica in the middle of a rollout, with 3 machines already created in the newMS and 3 still on the oldMS, and then MD spec is changed.
-				machineDeployment: createMD("v3", 6, 3, 1),
+				machineDeployment: createMD("v3", 6, withRolloutStrategy(3, 1)),
 				machineSets: []*clusterv1.MachineSet{
 					createMS("ms1", "v1", 3),
 					createMS("ms2", "v2", 3),
@@ -263,7 +263,7 @@ func Test_rolloutRollingSequences(t *testing.T) {
 				tt.randomControllerOrder = false
 				tt.logAndGoldenFileName = strings.ToLower(tt.name)
 				t.Run("default", func(t *testing.T) {
-					runTestCase(ctx, t, tt)
+					runRolloutRollingTestCase(ctx, t, tt)
 				})
 			}
 
@@ -274,7 +274,7 @@ func Test_rolloutRollingSequences(t *testing.T) {
 				tt.seed = 0
 				tt.logAndGoldenFileName = strings.ToLower(tt.name)
 				t.Run("random(0)", func(t *testing.T) {
-					runTestCase(ctx, t, tt)
+					runRolloutRollingTestCase(ctx, t, tt)
 				})
 			}
 
@@ -286,7 +286,7 @@ func Test_rolloutRollingSequences(t *testing.T) {
 					tt.randomControllerOrder = true
 					tt.skipLogToFileAndGoldenFileCheck = true
 					t.Run(fmt.Sprintf("random(%d)", tt.seed), func(t *testing.T) {
-						runTestCase(ctx, t, tt)
+						runRolloutRollingTestCase(ctx, t, tt)
 					})
 				}
 			}
@@ -294,18 +294,18 @@ func Test_rolloutRollingSequences(t *testing.T) {
 	}
 }
 
-func runTestCase(ctx context.Context, t *testing.T, tt rolloutSequenceTestCase) {
+func runRolloutRollingTestCase(ctx context.Context, t *testing.T, tt rolloutRollingSequenceTestCase) {
 	t.Helper()
 	g := NewWithT(t)
 
 	rng := rand.New(rand.NewSource(tt.seed)) //nolint:gosec // it is ok to use a weak randomizer here
 	fLogger := newFileLogger(t, tt.name, fmt.Sprintf("testdata/%s", tt.logAndGoldenFileName))
-	fLogger.writeGoldenFile = true
+	// uncomment this line to automatically generate/update golden files: fLogger.writeGoldenFile = true
 
 	// Init current and desired state from test case
 	current := tt.currentScope.Clone()
 	if current == nil {
-		current = initCurrentRolloutScope(tt)
+		current = initCurrentRolloutScope(tt.currentMachineNames, withRolloutStrategy(tt.maxSurge, tt.maxUnavailable))
 	}
 	desired := computeDesiredRolloutScope(current, tt.desiredMachineNames)
 
@@ -319,12 +319,15 @@ func runTestCase(ctx context.Context, t *testing.T, tt rolloutSequenceTestCase)
 	i := 1
 	maxIterations := tt.maxIterations
 	for {
-		taskOrder := defaultTaskOrder(current)
+		taskList := getTaskListRolloutRolling(current)
+		taskCount := len(taskList)
+		taskOrder := defaultTaskOrder(taskCount)
 		if tt.randomControllerOrder {
-			taskOrder = randomTaskOrder(current, rng)
+			taskOrder = randomTaskOrder(taskCount, rng)
 		}
 		for _, taskID := range taskOrder {
-			if taskID == 0 {
+			task := taskList[taskID]
+			if task == "md" {
 				fLogger.Logf("[MD controller] Iteration %d, Reconcile md", i)
 				fLogger.Logf("[MD controller] - Input to rollout planner\n%s", current)
 
@@ -334,8 +337,9 @@ func runTestCase(ctx context.Context, t *testing.T, tt rolloutSequenceTestCase)
 				p.newMS = current.newMS()
 				p.oldMSs = current.oldMSs()
 
-				err := p.Plan(ctx)
+				err := p.planRolloutRolling(ctx)
 				g.Expect(err).ToNot(HaveOccurred())
+
 				// Apply changes.
 				for _, ms := range current.machineSets {
 					if scaleIntent, ok := p.scaleIntents[ms.Name]; ok {
@@ -378,8 +382,8 @@ func runTestCase(ctx context.Context, t *testing.T, tt rolloutSequenceTestCase)
 
 			// Run mutators faking other controllers
 			for _, ms := range current.machineSets {
-				if fmt.Sprintf("ms%d", taskID) == ms.Name {
-					fLogger.Logf("[MS controller] Iteration %d, Reconcile ms%d, %s", i, taskID, msLog(ms, current.machineSetMachines[ms.Name]))
+				if ms.Name == task {
+					fLogger.Logf("[MS controller] Iteration %d, Reconcile %s, %s", i, ms.Name, msLog(ms, current.machineSetMachines[ms.Name]))
 					machineSetControllerMutator(fLogger, ms, current)
 					break
 				}
@@ -481,15 +485,14 @@ type rolloutScope struct {
 	machineUID int32
 }
 
-// Init creates current state and desired state for rolling out a md from currentMachines to wantMachineNames.
-func initCurrentRolloutScope(tt rolloutSequenceTestCase) (current *rolloutScope) {
+func initCurrentRolloutScope(currentMachineNames []string, mdOptions ...machineDeploymentOption) (current *rolloutScope) {
 	// create current state, with a MD with
 	// - given MaxSurge, MaxUnavailable
 	// - replica counters assuming all the machines are at stable state
 	// - spec different from the MachineSets and Machines we are going to create down below (to simulate a change that triggers a rollout, but it is not yet started)
-	mdReplicaCount := int32(len(tt.currentMachineNames))
+	mdReplicaCount := int32(len(currentMachineNames))
 	current = &rolloutScope{
-		machineDeployment: createMD("v2", mdReplicaCount, tt.maxSurge, tt.maxUnavailable),
+		machineDeployment: createMD("v2", mdReplicaCount, mdOptions...),
 	}
 
 	// Create current MS, with
@@ -502,13 +505,12 @@ func initCurrentRolloutScope(tt rolloutSequenceTestCase) (current *rolloutScope)
 	// - spec at stable state (rollout is not yet propagated to machines)
 	var totMachines int32
 	currentMachines := []*clusterv1.Machine{}
-	for _, machineSetMachineName := range tt.currentMachineNames {
+	for _, machineSetMachineName := range currentMachineNames {
 		totMachines++
 		currentMachines = append(currentMachines, createM(machineSetMachineName, ms.Name, ms.Spec.Template.Spec.FailureDomain))
 	}
 	current.machineSetMachines = map[string][]*clusterv1.Machine{}
 	current.machineSetMachines[ms.Name] = currentMachines
-
 	current.machineUID = totMachines
 
 	// TODO(in-place): this should be removed as soon as rolloutPlanner will take care of creating newMS
@@ -830,20 +832,30 @@ func maxSurgeToleration() func(log *fileLogger, _ int, _ *rolloutScope, _, _ int
 	}
 }
 
+func getTaskListRolloutRolling(current *rolloutScope) []string {
+	taskList := make([]string, 0)
+	taskList = append(taskList, "md")
+	for _, ms := range current.machineSets {
+		taskList = append(taskList, ms.Name)
+	}
+	taskList = append(taskList, fmt.Sprintf("ms%d", len(current.machineSets)+1)) // r the MachineSet that might be created when reconciling md
+	return taskList
+}
+
 // default task order ensure the controllers are run in a consistent and predictable way: md, ms1, ms2 and so on.
-func defaultTaskOrder(current *rolloutScope) []int {
+func defaultTaskOrder(taskCount int) []int {
 	taskOrder := []int{}
-	for t := range len(current.machineSets) + 1 + 1 { // +1 is for the MachineSet that might be created when reconciling md, +1 is for the md itself
+	for t := range taskCount {
 		taskOrder = append(taskOrder, t)
 	}
 	return taskOrder
 }
 
-func randomTaskOrder(current *rolloutScope, rng *rand.Rand) []int {
+func randomTaskOrder(taskCount int, rng *rand.Rand) []int {
 	u := &UniqueRand{
 		rng:       rng,
 		generated: map[int]bool{},
-		max:       len(current.machineSets) + 1 + 1, // +1 is for the MachineSet that might be created when reconciling md, +1 is for the md itself
+		max:       taskCount,
 	}
 	taskOrder := []int{}
 	for !u.Done() {
@@ -859,28 +871,37 @@ func randomTaskOrder(current *rolloutScope, rng *rand.Rand) []int {
 	return taskOrder
 }
 
-func createMD(failureDomain string, replicas int32, maxSurge, maxUnavailable int32) *clusterv1.MachineDeployment {
-	return &clusterv1.MachineDeployment{
+type machineDeploymentOption func(md *clusterv1.MachineDeployment)
+
+func withRolloutStrategy(maxSurge, maxUnavailable int32) func(md *clusterv1.MachineDeployment) {
+	return func(md *clusterv1.MachineDeployment) {
+		md.Spec.Rollout.Strategy = clusterv1.MachineDeploymentRolloutStrategy{
+			Type: clusterv1.RollingUpdateMachineDeploymentStrategyType,
+			RollingUpdate: clusterv1.MachineDeploymentRolloutStrategyRollingUpdate{
+				MaxSurge:       ptr.To(intstr.FromInt32(maxSurge)),
+				MaxUnavailable: ptr.To(intstr.FromInt32(maxUnavailable)),
+			},
+		}
+	}
+}
+
+func createMD(failureDomain string, replicas int32, options ...machineDeploymentOption) *clusterv1.MachineDeployment {
+	md := &clusterv1.MachineDeployment{
 		ObjectMeta: metav1.ObjectMeta{Name: "md"},
 		Spec: clusterv1.MachineDeploymentSpec{
 			// Note: using failureDomain as a template field to determine upToDate
 			Template: clusterv1.MachineTemplateSpec{Spec: clusterv1.MachineSpec{FailureDomain: failureDomain}},
 			Replicas: &replicas,
-			Rollout: clusterv1.MachineDeploymentRolloutSpec{
-				Strategy: clusterv1.MachineDeploymentRolloutStrategy{
-					Type: clusterv1.RollingUpdateMachineDeploymentStrategyType,
-					RollingUpdate: clusterv1.MachineDeploymentRolloutStrategyRollingUpdate{
-						MaxSurge:       ptr.To(intstr.FromInt32(maxSurge)),
-						MaxUnavailable: ptr.To(intstr.FromInt32(maxUnavailable)),
-					},
-				},
-			},
 		},
 		Status: clusterv1.MachineDeploymentStatus{
 			Replicas:          &replicas,
 			AvailableReplicas: &replicas,
 		},
 	}
+	for _, opt := range options {
+		opt(md)
+	}
+	return md
 }
 
 func withStatusAvailableReplicas(r int32) fakeMachineSetOption {

From aca5de4b25344e5d649e1e6948a14a1f8c53c52c Mon Sep 17 00:00:00 2001
From: fabriziopandini <fpandini@vmware.com>
Date: Mon, 6 Oct 2025 22:30:14 +0200
Subject: [PATCH 3/5] Address feedback

---
 .../machinedeployment_rolling.go              | 143 +++++++++++++-----
 .../machinedeployment_rolling_test.go         |  18 +--
 2 files changed, 110 insertions(+), 51 deletions(-)

diff --git a/internal/controllers/machinedeployment/machinedeployment_rolling.go b/internal/controllers/machinedeployment/machinedeployment_rolling.go
index 6e75fe8df344..4a9b5f657219 100644
--- a/internal/controllers/machinedeployment/machinedeployment_rolling.go
+++ b/internal/controllers/machinedeployment/machinedeployment_rolling.go
@@ -100,7 +100,7 @@ func newRolloutPlanner() *rolloutPlanner {
 	}
 }
 
-// planRolloutRolling determine how to proceed with the rollout when using the RolloutRolling strategy if we are not yet at the desired state.
+// planRolloutRolling determine how to proceed with the rollout when using the RolloutRolling strategy if the system is not yet at the desired state.
 func (p *rolloutPlanner) planRolloutRolling(ctx context.Context) error {
 	if p.md.Spec.Replicas == nil {
 		return errors.Errorf("spec.replicas for MachineDeployment %v is nil, this is unexpected", client.ObjectKeyFromObject(p.md))
@@ -128,8 +128,18 @@ func (p *rolloutPlanner) planRolloutRolling(ctx context.Context) error {
 
 	// This funcs tries to detect and address the case when a rollout is not making progress because both scaling down and scaling up are blocked.
 	// Note. this func must be called after computing scale up/down intent for all the MachineSets.
-	// Note. this func only address deadlock due to unavailable machines not getting deleted on oldMSs, e.g. due to a wrong configuration.
-	// unblocking deadlock when unavailable machines exists only on oldMSs, is required also because failures on old machines set are not remediated by MHC.
+	// Note. this func only address deadlock due to unavailable replicas not getting deleted on oldMSs, which can happen
+	// because reconcileOldMachineSetsRolloutRolling called above always assumes the worst case when deleting replicas e.g.
+	//  - MD with MaxSurge 1, MaxUnavailable 0
+	//  - OldMS with 3 replicas, 2 available replica (and thus 1 unavailable replica)
+	//  - NewMS with 1 replica, 1 available replica
+	//  - In theory it is possible to scale down oldMS from 3->2 replicas by deleting the unavailable replica.
+	//  - However, reconcileOldMachineSetsRolloutRolling cannot assume that the Machine controller is going to delete
+	//    the unavailable replica when scaling down from 3->2, because it might happen that one of the available replicas
+	//    is deleted instead.
+	//  - As consequence reconcileOldMachineSetsRolloutRolling, which assumes the worst case when deleting replicas, did not scaled down oldMS.
+	// This situation, rollout not proceeding due to unavailable replicas, is considered a deadlock to be addressed by reconcileDeadlockBreaker.
+	// Note. unblocking deadlock when unavailable replicas exists only on oldMSs, is required also because replicas on oldMSs are not remediated by MHC.
 	p.reconcileDeadlockBreaker(ctx)
 	return nil
 }
@@ -181,13 +191,13 @@ func (p *rolloutPlanner) reconcileOldMachineSetsRolloutRolling(ctx context.Conte
 
 	totReplicas := mdutil.GetReplicaCountForMachineSets(allMSs)
 
-	// Find the number of available machines.
+	// Find the total number of available replicas.
 	totAvailableReplicas := ptr.Deref(mdutil.GetAvailableReplicaCountForMachineSets(allMSs), 0)
 
 	// Find the number of pending scale down from previous reconcile/from current reconcile;
-	// This is required because whenever we are reducing the number of replicas, this operation could further impact availability e.g.
-	// - in case of regular rollout, there is no certainty about which machine is going to be deleted (and if this machine is currently available or not):
-	// 	 - e.g. MS controller is going to delete first machines with deletion annotation; also MS controller has a slight different notion of unavailable as of now.
+	// This is required because whenever the system is reducing the number of replicas, this operation could further impact availability e.g.
+	// - in case of regular rollout, there is no certainty about which replica is going to be deleted (and if the replica being deleted is currently available or not):
+	// 	 - e.g. MS controller is going to delete first replicas with deletion annotation; also MS controller has a slight different notion of unavailable as of now.
 	// - in case of in-place rollout, in-place upgrade are always assumed as impacting availability (they can always fail).
 	totPendingScaleDown := int32(0)
 	for _, ms := range allMSs {
@@ -196,7 +206,7 @@ func (p *rolloutPlanner) reconcileOldMachineSetsRolloutRolling(ctx context.Conte
 			scaleIntent = min(scaleIntent, v)
 		}
 
-		// NOTE: we are counting only pending scale down from the current status.replicas (so scale down of actual machines).
+		// NOTE: Count only pending scale down from the current status.replicas (so scale down of existing replicas).
 		if scaleIntent < ptr.Deref(ms.Status.Replicas, 0) {
 			totPendingScaleDown += max(ptr.Deref(ms.Status.Replicas, 0)-scaleIntent, 0)
 		}
@@ -204,30 +214,45 @@ func (p *rolloutPlanner) reconcileOldMachineSetsRolloutRolling(ctx context.Conte
 
 	// Compute the total number of replicas that can be scaled down.
 	// Exit immediately if there is no room for scaling down.
+	// NOTE: this is a quick preliminary check to verify if there is room for scaling down any of the oldMSs; further down the code
+	// will make additional checks to ensure scale down actually happens without breaching MaxUnavailable, and
+	// if necessary, it will reduce the extent of the scale down accordingly.
 	totalScaleDownCount := max(totReplicas-totPendingScaleDown-minAvailable, 0)
 	if totalScaleDownCount <= 0 {
 		return nil
 	}
 
+	// Sort oldMSs so the system will start deleting from the oldest MSs set first.
 	sort.Sort(mdutil.MachineSetsByCreationTimestamp(p.oldMSs))
 
 	// Scale down only unavailable replicas / up to residual totalScaleDownCount.
-	// NOTE: we are scaling up unavailable machines first in order to increase chances for the rollout to progress;
-	// however, the MS controller might have different opinion on which machines to scale down.
-	// As a consequence, the scale down operation must continuously assess if reducing the number of replicas
-	// for an older MS could further impact availability under the assumption than any scale down could further impact availability (same as above).
-	// if reducing the number of replicase might lead to breaching minAvailable, scale down extent must be limited accordingly.
-	totalScaleDownCount, totAvailableReplicas = p.scaleDownOldMSs(ctx, totalScaleDownCount, totAvailableReplicas, minAvailable, false)
-
-	// Then scale down old MS up to zero replicas / up to residual totalScaleDownCount.
-	// NOTE: also in this case, continuously assess if reducing the number of replicase could further impact availability,
-	// and if necessary, limit scale down extent to ensure the operation respects minAvailable limits.
-	_, _ = p.scaleDownOldMSs(ctx, totalScaleDownCount, totAvailableReplicas, minAvailable, true)
+	// NOTE: The system must scale down unavailable replicas first in order to increase chances for the rollout to progress;
+	// however, rollout planner must also take into account the fact that the MS controller might have different opinion on
+	// which replica to delete when a scale down happens.
+	//
+	// As a consequence, the rollout planner cannot assume a scale down operation deletes an unavailable replica. e.g.
+	//  - MD with MaxSurge 1, MaxUnavailable 0
+	//  - OldMS with 3 replicas, 2 available replica (and thus 1 unavailable replica)
+	//  - NewMS with 1 replica, 1 available replica
+	//  - In theory it is possible to scale down oldMS from 3->2 replicas by deleting the unavailable replica.
+	//  - However,  rollout planner cannot assume that the Machine controller is going to delete
+	//    the unavailable replica when scaling down from 3->2, because it might happen that one of the available replicas
+	//    is deleted instead.
+	//
+	// In the example above, the scaleDownOldMSs should not scale down OldMS from 3->2 replicas.
+	// In other use cases, e.g. when scaling down an oldMS from 5->3 replicas, the scaleDownOldMSs should limit scale down extent
+	// only partially if this doesn't breach MaxUnavailable (e.g. scale down 5->4 instead of 5->3).
+	totalScaleDownCount, totAvailableReplicas = p.scaleDownOldMSs(ctx, totalScaleDownCount, totAvailableReplicas, minAvailable, true)
+
+	// Then scale down old MS down to zero replicas / down to residual totalScaleDownCount.
+	// NOTE: also in this case, should continuously assess if reducing the number of replicase could further impact availability,
+	// and if necessary, limit scale down extent to ensure the operation respects MaxUnavailable limits.
+	_, _ = p.scaleDownOldMSs(ctx, totalScaleDownCount, totAvailableReplicas, minAvailable, false)
 
 	return nil
 }
 
-func (p *rolloutPlanner) scaleDownOldMSs(ctx context.Context, totalScaleDownCount, totAvailableReplicas, minAvailable int32, scaleToZero bool) (int32, int32) {
+func (p *rolloutPlanner) scaleDownOldMSs(ctx context.Context, totalScaleDownCount, totAvailableReplicas, minAvailable int32, scaleDownOnlyUnavailableReplicas bool) (int32, int32) {
 	log := ctrl.LoggerFrom(ctx)
 
 	for _, oldMS := range p.oldMSs {
@@ -237,46 +262,80 @@ func (p *rolloutPlanner) scaleDownOldMSs(ctx context.Context, totalScaleDownCoun
 		}
 
 		// No op if this MS has been already scaled down to zero.
-		scaleIntent := ptr.Deref(oldMS.Spec.Replicas, 0)
+		replicas := ptr.Deref(oldMS.Spec.Replicas, 0)
 		if v, ok := p.scaleIntents[oldMS.Name]; ok {
-			scaleIntent = min(scaleIntent, v)
+			replicas = min(replicas, v)
 		}
 
-		if scaleIntent <= 0 {
+		if replicas <= 0 {
 			continue
 		}
 
-		// Compute the scale down extent by considering either unavailable replicas or, if scaleToZero is set, all replicas.
+		// Compute the scale down extent by considering either all replicas or, if scaleDownOnlyUnavailableReplicas is set, unavailable replicas only.
 		// In both cases, scale down is limited to totalScaleDownCount.
-		maxScaleDown := max(scaleIntent-ptr.Deref(oldMS.Status.AvailableReplicas, 0), 0)
-		if scaleToZero {
-			maxScaleDown = scaleIntent
+		// Exit if there is no room for scaling down the MS.
+		// NOTE: this is a preliminary check to verify if there is room for scaling down this specific MS; further down the code
+		// will make additional checks to ensure scale down actually happens without breaching MaxUnavailable, and
+		// if necessary, it will reduce the extent of the scale down accordingly.
+		maxScaleDown := replicas
+		if scaleDownOnlyUnavailableReplicas {
+			maxScaleDown = max(replicas-ptr.Deref(oldMS.Status.AvailableReplicas, 0), 0)
 		}
 		scaleDown := min(maxScaleDown, totalScaleDownCount)
-
-		// Exit if there is no room for scaling down the MS.
 		if scaleDown == 0 {
 			continue
 		}
 
 		// Before scaling down validate if the operation will lead to a breach to minAvailability
-		// In order to do so, consider how many machines will be actually deleted, and consider this operation as impacting availability;
-		// if the projected state breaches minAvailability, reduce the scale down extend accordingly.
-		availableMachineScaleDown := int32(0)
-		if ptr.Deref(oldMS.Status.AvailableReplicas, 0) > 0 {
-			newScaleIntent := scaleIntent - scaleDown
-			machineScaleDownIntent := max(ptr.Deref(oldMS.Status.Replicas, 0)-newScaleIntent, 0)
-			if totAvailableReplicas-machineScaleDownIntent < minAvailable {
-				availableMachineScaleDown = max(totAvailableReplicas-minAvailable, 0)
-				scaleDown = scaleDown - machineScaleDownIntent + availableMachineScaleDown
+		// In order to do so, consider how many exiting replicas will be actually deleted, and consider
+		// this operation as impacting availability because there are no guarantees that the MS controller is going to
+		// delete unavailable replicas first; if the projected state breaches minAvailability, reduce the scale down extend accordingly.
 
-				totAvailableReplicas = max(totAvailableReplicas-availableMachineScaleDown, 0)
+		// If there are no available replicas on this MS, scale down won't impact totAvailableReplicas at all.
+		if ptr.Deref(oldMS.Status.AvailableReplicas, 0) > 0 {
+			// If instead there AvailableReplicas on this MS:
+			// compute the new spec.replicas assuming we are going to use the entire scale down extent.
+			newReplicas := max(replicas-scaleDown, 0)
+
+			// compute how many existing replicas the operation is going to delete:
+			// e.g. if MS is scaling down spec.replicas from 5 to 3, but status.replicas is 4, it is scaling down 1 existing replica.
+			// e.g. if MS is scaling down spec.replicas from 5 to 3, but status.replicas is 6, it is scaling down 3 existing replicas.
+			existingReplicasToBeDeleted := max(ptr.Deref(oldMS.Status.Replicas, 0)-newReplicas, 0)
+
+			// If we are deleting at least one existing replicas
+			if existingReplicasToBeDeleted > 0 {
+				// Check if we are scaling down more existing replica than what is allowed by MaxUnavailability.
+				if totAvailableReplicas-minAvailable < existingReplicasToBeDeleted {
+					// If we are scaling down more existing replica than what is allowed by MaxUnavailability, then
+					// rollout planner must revisit the scale down extent.
+
+					// Determine how many replicas can be deleted overall without further impacting availability.
+					maxExistingReplicasThatCanBeDeleted := max(totAvailableReplicas-minAvailable, 0)
+
+					// Compute the revisited new spec.replicas:
+					// e.g. MS spec.replicas 20, scale down 8, newReplicas 12 (20-8), status.replicas 15 -> existingReplicasToBeDeleted 3 (15-12)
+					//   assuming that maxExistingReplicasThatCanBeDeleted is 2, newScaleIntentRevisited should be 15-2 = 13
+					// e.g. MS spec.replicas 16, scale down 3, newReplicas 13 (16-3), status.replicas 21 -> existingReplicasToBeDeleted 8 (21-13)
+					//   assuming that maxExistingReplicasThatCanBeDeleted is 7, newScaleIntentRevisited should be 21-7 = 14
+					// NOTE: there is a safeguard preventing to go above the initial replicas number (this is scale down oldMS).
+					newReplicasRevisited := min(ptr.Deref(oldMS.Status.Replicas, 0)-maxExistingReplicasThatCanBeDeleted, replicas)
+
+					// Re-compute the scale down extent by using newReplicasRevisited.
+					scaleDown = max(replicas-newReplicasRevisited, 0)
+
+					// Re-compute how many existing replicas the operation is going to delete by using newReplicasRevisited.
+					existingReplicasToBeDeleted = max(ptr.Deref(oldMS.Status.Replicas, 0)-newReplicasRevisited, 0)
+				}
+
+				// keep track that we are deleting existing replicas by assuming that this operation
+				// will reduce totAvailableReplicas (worst scenario, deletion of available machines happen first).
+				totAvailableReplicas -= min(ptr.Deref(oldMS.Status.AvailableReplicas, 0), existingReplicasToBeDeleted)
 			}
 		}
 
 		if scaleDown > 0 {
-			newScaleIntent := max(scaleIntent-scaleDown, 0)
-			log.V(5).Info(fmt.Sprintf("Setting scale down intent for %s to %d replicas (-%d)", oldMS.Name, newScaleIntent, scaleDown), "machineset", client.ObjectKeyFromObject(oldMS).String())
+			newScaleIntent := max(replicas-scaleDown, 0)
+			log.V(5).Info(fmt.Sprintf("Setting scale down intent for %s to %d replicas (-%d)", oldMS.Name, newScaleIntent, scaleDown), "MachineSet", klog.KObj(oldMS))
 			p.scaleIntents[oldMS.Name] = newScaleIntent
 			totalScaleDownCount = max(totalScaleDownCount-scaleDown, 0)
 		}
@@ -336,7 +395,7 @@ func (p *rolloutPlanner) reconcileDeadlockBreaker(ctx context.Context) {
 		}
 
 		newScaleIntent := max(ptr.Deref(oldMS.Spec.Replicas, 0)-1, 0)
-		log.Info(fmt.Sprintf("Setting scale down intent for %s to %d replicas (-%d) to unblock rollout stuck due to unavailable machine on oldMS only", oldMS.Name, newScaleIntent, 1), "machineset", client.ObjectKeyFromObject(oldMS).String())
+		log.Info(fmt.Sprintf("Setting scale down intent for %s to %d replicas (-%d) to unblock rollout stuck due to unavailable machine on oldMS only", oldMS.Name, newScaleIntent, 1), "MachineSet", klog.KObj(oldMS))
 		p.scaleIntents[oldMS.Name] = newScaleIntent
 		return
 	}
diff --git a/internal/controllers/machinedeployment/machinedeployment_rolling_test.go b/internal/controllers/machinedeployment/machinedeployment_rolling_test.go
index 85544da4574d..98c6f28336f9 100644
--- a/internal/controllers/machinedeployment/machinedeployment_rolling_test.go
+++ b/internal/controllers/machinedeployment/machinedeployment_rolling_test.go
@@ -334,7 +334,7 @@ func Test_reconcileOldMachineSetsRolloutRolling(t *testing.T) {
 			},
 		},
 		{
-			name:        "do not scale down if there are more replicas than minAvailable replicas, but scale down from a previous reconcile already takes the availability buffer, scale down from a previous reconcile on another MSr",
+			name:        "do not scale down if there are more replicas than minAvailable replicas, but scale down from a previous reconcile already takes the availability buffer, scale down from a previous reconcile on another MS",
 			scaleIntent: map[string]int32{},
 			md:          createMD("v2", 6, withRolloutStrategy(3, 1)),
 			newMS:       createMS("ms3", "v2", 3, withStatusReplicas(0), withStatusAvailableReplicas(0)), // NewMS is scaling up from previous reconcile, but replicas do not exists yet
@@ -573,7 +573,7 @@ func Test_reconcileDeadlockBreaker(t *testing.T) {
 			scaleIntent: map[string]int32{},
 			newMS:       createMS("ms2", "v2", 10, withStatusReplicas(10), withStatusAvailableReplicas(10)),
 			oldMSs: []*clusterv1.MachineSet{
-				createMS("ms1", "v1", 0, withStatusReplicas(0), withStatusAvailableReplicas(0)),
+				createMS("ms1", "v1", 0, withStatusReplicas(0), withStatusAvailableReplicas(0)), // there no replicas, not a deadlock, we are actually at desired state
 			},
 			expectScaleIntent: map[string]int32{
 				// no new scale down intent for oldMSs (ms1):
@@ -584,7 +584,7 @@ func Test_reconcileDeadlockBreaker(t *testing.T) {
 			scaleIntent: map[string]int32{},
 			newMS:       createMS("ms2", "v2", 5, withStatusReplicas(5), withStatusAvailableReplicas(5)),
 			oldMSs: []*clusterv1.MachineSet{
-				createMS("ms1", "v1", 5, withStatusReplicas(5), withStatusAvailableReplicas(5)),
+				createMS("ms1", "v1", 5, withStatusReplicas(5), withStatusAvailableReplicas(5)), // there no unavailable replicas, not a deadlock (rollout will continue as usual)
 			},
 			expectScaleIntent: map[string]int32{
 				// no new scale down intent for oldMSs (ms1):
@@ -595,7 +595,7 @@ func Test_reconcileDeadlockBreaker(t *testing.T) {
 			scaleIntent: map[string]int32{},
 			newMS:       createMS("ms2", "v2", 6, withStatusReplicas(5), withStatusAvailableReplicas(5)), // scale up from a previous reconcile
 			oldMSs: []*clusterv1.MachineSet{
-				createMS("ms1", "v1", 5, withStatusReplicas(5), withStatusAvailableReplicas(4)), // one unavailable replica, potential deadlock
+				createMS("ms1", "v1", 5, withStatusReplicas(5), withStatusAvailableReplicas(4)), // there is at least one unavailable replica, not yet considered deadlock because scale operation still in progress
 			},
 			expectScaleIntent: map[string]int32{
 				// no new scale down intent for oldMSs (ms1):
@@ -606,7 +606,7 @@ func Test_reconcileDeadlockBreaker(t *testing.T) {
 			scaleIntent: map[string]int32{},
 			newMS:       createMS("ms2", "v2", 5, withStatusReplicas(5), withStatusAvailableReplicas(5)),
 			oldMSs: []*clusterv1.MachineSet{
-				createMS("ms1", "v1", 4, withStatusReplicas(5), withStatusAvailableReplicas(4)), // one unavailable replica, potential deadlock, scale down from a previous reconcile
+				createMS("ms1", "v1", 4, withStatusReplicas(5), withStatusAvailableReplicas(4)), // there is at least one unavailable replica, not yet considered deadlock because scale operation still in progress
 			},
 			expectScaleIntent: map[string]int32{
 				// no new scale down intent for oldMSs (ms1):
@@ -619,7 +619,7 @@ func Test_reconcileDeadlockBreaker(t *testing.T) {
 			},
 			newMS: createMS("ms2", "v2", 5, withStatusReplicas(5), withStatusAvailableReplicas(5)),
 			oldMSs: []*clusterv1.MachineSet{
-				createMS("ms1", "v1", 5, withStatusReplicas(5), withStatusAvailableReplicas(4)), // one unavailable replica, potential deadlock
+				createMS("ms1", "v1", 5, withStatusReplicas(5), withStatusAvailableReplicas(4)), // there is at least one unavailable replica, not yet considered deadlock because scale operation still in progress
 			},
 			expectScaleIntent: map[string]int32{
 				"ms2": 6,
@@ -633,7 +633,7 @@ func Test_reconcileDeadlockBreaker(t *testing.T) {
 			},
 			newMS: createMS("ms2", "v2", 5, withStatusReplicas(5), withStatusAvailableReplicas(5)),
 			oldMSs: []*clusterv1.MachineSet{
-				createMS("ms1", "v1", 5, withStatusReplicas(5), withStatusAvailableReplicas(4)), // one unavailable replica, potential deadlock
+				createMS("ms1", "v1", 5, withStatusReplicas(5), withStatusAvailableReplicas(4)), // there is at least one unavailable replica, not yet considered deadlock because scale operation still in progress
 			},
 			expectScaleIntent: map[string]int32{
 				// no new scale down intent for oldMSs (ms1):
@@ -645,7 +645,7 @@ func Test_reconcileDeadlockBreaker(t *testing.T) {
 			scaleIntent: map[string]int32{},
 			newMS:       createMS("ms2", "v2", 5, withStatusReplicas(5), withStatusAvailableReplicas(3)), // one unavailable replica
 			oldMSs: []*clusterv1.MachineSet{
-				createMS("ms1", "v1", 5, withStatusReplicas(5), withStatusAvailableReplicas(4)), // one unavailable replica, potential deadlock
+				createMS("ms1", "v1", 5, withStatusReplicas(5), withStatusAvailableReplicas(4)), // there is at least one unavailable replica, no scale operations in progress, potential deadlock, but the system must wait until all replica on newMS are available before unblocking further deletion on oldMS.
 			},
 			expectScaleIntent: map[string]int32{
 				// no new scale down intent for oldMSs (ms1):
@@ -656,7 +656,7 @@ func Test_reconcileDeadlockBreaker(t *testing.T) {
 			scaleIntent: map[string]int32{},
 			newMS:       createMS("ms2", "v2", 5, withStatusReplicas(5), withStatusAvailableReplicas(5)),
 			oldMSs: []*clusterv1.MachineSet{
-				createMS("ms1", "v1", 5, withStatusReplicas(5), withStatusAvailableReplicas(3)), // one unavailable replica, potential deadlock
+				createMS("ms1", "v1", 5, withStatusReplicas(5), withStatusAvailableReplicas(3)), // there is at least one unavailable replica, all replicas on newMS available, no scale operations in progress, deadlock
 			},
 			expectScaleIntent: map[string]int32{
 				// new scale down intent for oldMSs (ms1):

From 7e8ac3ac5ff51e009cf4d8875d468e927a8126aa Mon Sep 17 00:00:00 2001
From: Stefan Bueringer <buringerst@vmware.com>
Date: Tue, 7 Oct 2025 11:53:19 +0200
Subject: [PATCH 4/5] Fix review findings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Stefan Büringer buringerst@vmware.com
---
 .../machinedeployment_rolling.go              | 114 +++++++++---------
 .../machinedeployment_rolling_test.go         |  36 +++---
 2 files changed, 75 insertions(+), 75 deletions(-)

diff --git a/internal/controllers/machinedeployment/machinedeployment_rolling.go b/internal/controllers/machinedeployment/machinedeployment_rolling.go
index 4a9b5f657219..cb01586c13e7 100644
--- a/internal/controllers/machinedeployment/machinedeployment_rolling.go
+++ b/internal/controllers/machinedeployment/machinedeployment_rolling.go
@@ -126,20 +126,20 @@ func (p *rolloutPlanner) planRolloutRolling(ctx context.Context) error {
 		return err
 	}
 
-	// This funcs tries to detect and address the case when a rollout is not making progress because both scaling down and scaling up are blocked.
-	// Note. this func must be called after computing scale up/down intent for all the MachineSets.
-	// Note. this func only address deadlock due to unavailable replicas not getting deleted on oldMSs, which can happen
+	// This func tries to detect and address the case when a rollout is not making progress because both scaling down and scaling up are blocked.
+	// Note: This func must be called after computing scale up/down intent for all the MachineSets.
+	// Note: This func only addresses deadlocks due to unavailable replicas not getting deleted on oldMSs, which can happen
 	// because reconcileOldMachineSetsRolloutRolling called above always assumes the worst case when deleting replicas e.g.
-	//  - MD with MaxSurge 1, MaxUnavailable 0
+	//  - MD with spec.replicas 3, MaxSurge 1, MaxUnavailable 0
 	//  - OldMS with 3 replicas, 2 available replica (and thus 1 unavailable replica)
 	//  - NewMS with 1 replica, 1 available replica
 	//  - In theory it is possible to scale down oldMS from 3->2 replicas by deleting the unavailable replica.
-	//  - However, reconcileOldMachineSetsRolloutRolling cannot assume that the Machine controller is going to delete
+	//  - However, reconcileOldMachineSetsRolloutRolling cannot assume that the MachineSet controller is going to delete
 	//    the unavailable replica when scaling down from 3->2, because it might happen that one of the available replicas
 	//    is deleted instead.
-	//  - As consequence reconcileOldMachineSetsRolloutRolling, which assumes the worst case when deleting replicas, did not scaled down oldMS.
+	//  - As a consequence reconcileOldMachineSetsRolloutRolling, which assumes the worst case when deleting replicas, did not scaled down oldMS.
 	// This situation, rollout not proceeding due to unavailable replicas, is considered a deadlock to be addressed by reconcileDeadlockBreaker.
-	// Note. unblocking deadlock when unavailable replicas exists only on oldMSs, is required also because replicas on oldMSs are not remediated by MHC.
+	// Note: Unblocking deadlocks when unavailable replicas exist only on oldMSs, is required also because replicas on oldMSs are not remediated by MHC.
 	p.reconcileDeadlockBreaker(ctx)
 	return nil
 }
@@ -189,17 +189,17 @@ func (p *rolloutPlanner) reconcileOldMachineSetsRolloutRolling(ctx context.Conte
 	maxUnavailable := mdutil.MaxUnavailable(*p.md)
 	minAvailable := max(ptr.Deref(p.md.Spec.Replicas, 0)-maxUnavailable, 0)
 
-	totReplicas := mdutil.GetReplicaCountForMachineSets(allMSs)
+	totalSpecReplicas := mdutil.GetReplicaCountForMachineSets(allMSs)
 
 	// Find the total number of available replicas.
-	totAvailableReplicas := ptr.Deref(mdutil.GetAvailableReplicaCountForMachineSets(allMSs), 0)
+	totalAvailableReplicas := ptr.Deref(mdutil.GetAvailableReplicaCountForMachineSets(allMSs), 0)
 
-	// Find the number of pending scale down from previous reconcile/from current reconcile;
+	// Find the number of pending scale down from previous reconcile/from current reconcile.
 	// This is required because whenever the system is reducing the number of replicas, this operation could further impact availability e.g.
-	// - in case of regular rollout, there is no certainty about which replica is going to be deleted (and if the replica being deleted is currently available or not):
-	// 	 - e.g. MS controller is going to delete first replicas with deletion annotation; also MS controller has a slight different notion of unavailable as of now.
-	// - in case of in-place rollout, in-place upgrade are always assumed as impacting availability (they can always fail).
-	totPendingScaleDown := int32(0)
+	// - in case of regular rollouts, there is no certainty about which replica is going to be deleted (and if the replica being deleted is currently available or not):
+	// 	 - e.g. MS controller is going to first delete replicas with deletion annotation; also MS controller has a slightly different notion of unavailable as of now.
+	// - in case of in-place rollout, in-place updates are always assumed as impacting availability (they can always fail).
+	totalPendingScaleDown := int32(0)
 	for _, ms := range allMSs {
 		scaleIntent := ptr.Deref(ms.Spec.Replicas, 0)
 		if v, ok := p.scaleIntents[ms.Name]; ok {
@@ -208,7 +208,7 @@ func (p *rolloutPlanner) reconcileOldMachineSetsRolloutRolling(ctx context.Conte
 
 		// NOTE: Count only pending scale down from the current status.replicas (so scale down of existing replicas).
 		if scaleIntent < ptr.Deref(ms.Status.Replicas, 0) {
-			totPendingScaleDown += max(ptr.Deref(ms.Status.Replicas, 0)-scaleIntent, 0)
+			totalPendingScaleDown += max(ptr.Deref(ms.Status.Replicas, 0)-scaleIntent, 0)
 		}
 	}
 
@@ -217,42 +217,42 @@ func (p *rolloutPlanner) reconcileOldMachineSetsRolloutRolling(ctx context.Conte
 	// NOTE: this is a quick preliminary check to verify if there is room for scaling down any of the oldMSs; further down the code
 	// will make additional checks to ensure scale down actually happens without breaching MaxUnavailable, and
 	// if necessary, it will reduce the extent of the scale down accordingly.
-	totalScaleDownCount := max(totReplicas-totPendingScaleDown-minAvailable, 0)
+	totalScaleDownCount := max(totalSpecReplicas-totalPendingScaleDown-minAvailable, 0)
 	if totalScaleDownCount <= 0 {
 		return nil
 	}
 
-	// Sort oldMSs so the system will start deleting from the oldest MSs set first.
+	// Sort oldMSs so the system will start deleting from the oldest MS first.
 	sort.Sort(mdutil.MachineSetsByCreationTimestamp(p.oldMSs))
 
 	// Scale down only unavailable replicas / up to residual totalScaleDownCount.
-	// NOTE: The system must scale down unavailable replicas first in order to increase chances for the rollout to progress;
-	// however, rollout planner must also take into account the fact that the MS controller might have different opinion on
+	// NOTE: The system must scale down unavailable replicas first in order to increase chances for the rollout to progress.
+	// However, rollout planner must also take into account the fact that the MS controller might have a different opinion on
 	// which replica to delete when a scale down happens.
 	//
 	// As a consequence, the rollout planner cannot assume a scale down operation deletes an unavailable replica. e.g.
-	//  - MD with MaxSurge 1, MaxUnavailable 0
+	//  - MD with spec.replicas 3, MaxSurge 1, MaxUnavailable 0
 	//  - OldMS with 3 replicas, 2 available replica (and thus 1 unavailable replica)
 	//  - NewMS with 1 replica, 1 available replica
 	//  - In theory it is possible to scale down oldMS from 3->2 replicas by deleting the unavailable replica.
-	//  - However,  rollout planner cannot assume that the Machine controller is going to delete
+	//  - However,  rollout planner cannot assume that the MachineSet controller is going to delete
 	//    the unavailable replica when scaling down from 3->2, because it might happen that one of the available replicas
 	//    is deleted instead.
 	//
 	// In the example above, the scaleDownOldMSs should not scale down OldMS from 3->2 replicas.
 	// In other use cases, e.g. when scaling down an oldMS from 5->3 replicas, the scaleDownOldMSs should limit scale down extent
 	// only partially if this doesn't breach MaxUnavailable (e.g. scale down 5->4 instead of 5->3).
-	totalScaleDownCount, totAvailableReplicas = p.scaleDownOldMSs(ctx, totalScaleDownCount, totAvailableReplicas, minAvailable, true)
+	totalScaleDownCount, totalAvailableReplicas = p.scaleDownOldMSs(ctx, totalScaleDownCount, totalAvailableReplicas, minAvailable, true)
 
 	// Then scale down old MS down to zero replicas / down to residual totalScaleDownCount.
-	// NOTE: also in this case, should continuously assess if reducing the number of replicase could further impact availability,
+	// NOTE: Also in this case, we should continuously assess if reducing the number of replicase could further impact availability,
 	// and if necessary, limit scale down extent to ensure the operation respects MaxUnavailable limits.
-	_, _ = p.scaleDownOldMSs(ctx, totalScaleDownCount, totAvailableReplicas, minAvailable, false)
+	_, _ = p.scaleDownOldMSs(ctx, totalScaleDownCount, totalAvailableReplicas, minAvailable, false)
 
 	return nil
 }
 
-func (p *rolloutPlanner) scaleDownOldMSs(ctx context.Context, totalScaleDownCount, totAvailableReplicas, minAvailable int32, scaleDownOnlyUnavailableReplicas bool) (int32, int32) {
+func (p *rolloutPlanner) scaleDownOldMSs(ctx context.Context, totalScaleDownCount, totalAvailableReplicas, minAvailable int32, scaleDownOnlyUnavailableReplicas bool) (int32, int32) {
 	log := ctrl.LoggerFrom(ctx)
 
 	for _, oldMS := range p.oldMSs {
@@ -261,12 +261,12 @@ func (p *rolloutPlanner) scaleDownOldMSs(ctx context.Context, totalScaleDownCoun
 			break
 		}
 
-		// No op if this MS has been already scaled down to zero.
 		replicas := ptr.Deref(oldMS.Spec.Replicas, 0)
 		if v, ok := p.scaleIntents[oldMS.Name]; ok {
 			replicas = min(replicas, v)
 		}
 
+		// No op if this MS has been already scaled down to zero.
 		if replicas <= 0 {
 			continue
 		}
@@ -286,68 +286,68 @@ func (p *rolloutPlanner) scaleDownOldMSs(ctx context.Context, totalScaleDownCoun
 			continue
 		}
 
-		// Before scaling down validate if the operation will lead to a breach to minAvailability
-		// In order to do so, consider how many exiting replicas will be actually deleted, and consider
+		// Before scaling down validate if the operation will lead to a breach of minAvailability.
+		// In order to do so, consider how many existing replicas will be actually deleted, and consider
 		// this operation as impacting availability because there are no guarantees that the MS controller is going to
 		// delete unavailable replicas first; if the projected state breaches minAvailability, reduce the scale down extend accordingly.
 
-		// If there are no available replicas on this MS, scale down won't impact totAvailableReplicas at all.
+		// If there are no available replicas on this MS, scale down won't impact totalAvailableReplicas at all.
 		if ptr.Deref(oldMS.Status.AvailableReplicas, 0) > 0 {
-			// If instead there AvailableReplicas on this MS:
+			// If instead there are AvailableReplicas on this MS:
 			// compute the new spec.replicas assuming we are going to use the entire scale down extent.
-			newReplicas := max(replicas-scaleDown, 0)
+			newSpecReplicas := max(replicas-scaleDown, 0)
 
 			// compute how many existing replicas the operation is going to delete:
 			// e.g. if MS is scaling down spec.replicas from 5 to 3, but status.replicas is 4, it is scaling down 1 existing replica.
 			// e.g. if MS is scaling down spec.replicas from 5 to 3, but status.replicas is 6, it is scaling down 3 existing replicas.
-			existingReplicasToBeDeleted := max(ptr.Deref(oldMS.Status.Replicas, 0)-newReplicas, 0)
+			existingReplicasToBeDeleted := max(ptr.Deref(oldMS.Status.Replicas, 0)-newSpecReplicas, 0)
 
 			// If we are deleting at least one existing replicas
 			if existingReplicasToBeDeleted > 0 {
 				// Check if we are scaling down more existing replica than what is allowed by MaxUnavailability.
-				if totAvailableReplicas-minAvailable < existingReplicasToBeDeleted {
+				if totalAvailableReplicas-minAvailable < existingReplicasToBeDeleted {
 					// If we are scaling down more existing replica than what is allowed by MaxUnavailability, then
 					// rollout planner must revisit the scale down extent.
 
 					// Determine how many replicas can be deleted overall without further impacting availability.
-					maxExistingReplicasThatCanBeDeleted := max(totAvailableReplicas-minAvailable, 0)
+					maxExistingReplicasThatCanBeDeleted := max(totalAvailableReplicas-minAvailable, 0)
 
 					// Compute the revisited new spec.replicas:
-					// e.g. MS spec.replicas 20, scale down 8, newReplicas 12 (20-8), status.replicas 15 -> existingReplicasToBeDeleted 3 (15-12)
-					//   assuming that maxExistingReplicasThatCanBeDeleted is 2, newScaleIntentRevisited should be 15-2 = 13
-					// e.g. MS spec.replicas 16, scale down 3, newReplicas 13 (16-3), status.replicas 21 -> existingReplicasToBeDeleted 8 (21-13)
-					//   assuming that maxExistingReplicasThatCanBeDeleted is 7, newScaleIntentRevisited should be 21-7 = 14
+					// e.g. MS spec.replicas 20, scale down 8, newSpecReplicas 12 (20-8), status.replicas 15 -> existingReplicasToBeDeleted 3 (15-12)
+					//   assuming that maxExistingReplicasThatCanBeDeleted is 2, newSpecReplicasRevisited should be 15-2 = 13
+					// e.g. MS spec.replicas 16, scale down 3, newSpecReplicas 13 (16-3), status.replicas 21 -> existingReplicasToBeDeleted 8 (21-13)
+					//   assuming that maxExistingReplicasThatCanBeDeleted is 7, newSpecReplicasRevisited should be 21-7 = 14
 					// NOTE: there is a safeguard preventing to go above the initial replicas number (this is scale down oldMS).
-					newReplicasRevisited := min(ptr.Deref(oldMS.Status.Replicas, 0)-maxExistingReplicasThatCanBeDeleted, replicas)
+					newSpecReplicasRevisited := min(ptr.Deref(oldMS.Status.Replicas, 0)-maxExistingReplicasThatCanBeDeleted, replicas)
 
-					// Re-compute the scale down extent by using newReplicasRevisited.
-					scaleDown = max(replicas-newReplicasRevisited, 0)
+					// Re-compute the scale down extent by using newSpecReplicasRevisited.
+					scaleDown = max(replicas-newSpecReplicasRevisited, 0)
 
-					// Re-compute how many existing replicas the operation is going to delete by using newReplicasRevisited.
-					existingReplicasToBeDeleted = max(ptr.Deref(oldMS.Status.Replicas, 0)-newReplicasRevisited, 0)
+					// Re-compute how many existing replicas the operation is going to delete by using newSpecReplicasRevisited.
+					existingReplicasToBeDeleted = max(ptr.Deref(oldMS.Status.Replicas, 0)-newSpecReplicasRevisited, 0)
 				}
 
 				// keep track that we are deleting existing replicas by assuming that this operation
-				// will reduce totAvailableReplicas (worst scenario, deletion of available machines happen first).
-				totAvailableReplicas -= min(ptr.Deref(oldMS.Status.AvailableReplicas, 0), existingReplicasToBeDeleted)
+				// will reduce totalAvailableReplicas (worst scenario, deletion of available machines happen first).
+				totalAvailableReplicas -= min(ptr.Deref(oldMS.Status.AvailableReplicas, 0), existingReplicasToBeDeleted)
 			}
 		}
 
 		if scaleDown > 0 {
 			newScaleIntent := max(replicas-scaleDown, 0)
-			log.V(5).Info(fmt.Sprintf("Setting scale down intent for %s to %d replicas (-%d)", oldMS.Name, newScaleIntent, scaleDown), "MachineSet", klog.KObj(oldMS))
+			log.V(5).Info(fmt.Sprintf("Setting scale down intent for MachineSet %s to %d replicas (-%d)", oldMS.Name, newScaleIntent, scaleDown), "MachineSet", klog.KObj(oldMS))
 			p.scaleIntents[oldMS.Name] = newScaleIntent
 			totalScaleDownCount = max(totalScaleDownCount-scaleDown, 0)
 		}
 	}
 
-	return totalScaleDownCount, totAvailableReplicas
+	return totalScaleDownCount, totalAvailableReplicas
 }
 
 // This funcs tries to detect and address the case when a rollout is not making progress because both scaling down and scaling up are blocked.
-// Note. this func must be called after computing scale up/down intent for all the MachineSets.
-// Note. this func only address deadlock due to unavailable machines not getting deleted on oldMSs, e.g. due to a wrong configuration.
-// unblocking deadlock when unavailable machines exists only on oldMSs, is required also because failures on old machines set are not remediated by MHC.
+// Note: This func must be called after computing scale up/down intent for all the MachineSets.
+// Note: This func only address deadlock due to unavailable machines not getting deleted on oldMSs, e.g. due to a wrong configuration.
+// Note: Unblocking deadlocks when unavailable replicas exist only on oldMSs, is required also because replicas on oldMSs are not remediated by MHC.
 func (p *rolloutPlanner) reconcileDeadlockBreaker(ctx context.Context) {
 	log := ctrl.LoggerFrom(ctx)
 	allMSs := append(p.oldMSs, p.newMS)
@@ -362,7 +362,7 @@ func (p *rolloutPlanner) reconcileDeadlockBreaker(ctx context.Context) {
 		return
 	}
 
-	// if there are scale operation in progress, no deadlock.
+	// If there are scale operation in progress, no deadlock.
 	// Note: we are considering both scale operation from previous and current reconcile.
 	// Note: we are counting only pending scale up & down from the current status.replicas (so actual scale up & down of replicas number, not any other possible "re-alignment" of spec.replicas).
 	for _, ms := range allMSs {
@@ -376,26 +376,26 @@ func (p *rolloutPlanner) reconcileDeadlockBreaker(ctx context.Context) {
 	}
 
 	// if there are unavailable replicas on the newMS, wait for them to become available first.
-	// Note: a rollout cannot be unblocked if new machines do not become available.
-	// Note: if the replicas on the newMS are not going to become available for any issue either:
+	// Note: A rollout cannot be unblocked if new machines do not become available.
+	// Note: If the replicas on the newMS are not becoming available either:
 	// - automatic remediation can help in addressing temporary failures.
 	// - user intervention is required to fix more permanent issues e.g. to fix a wrong configuration.
 	if ptr.Deref(p.newMS.Status.AvailableReplicas, 0) != ptr.Deref(p.newMS.Status.Replicas, 0) {
 		return
 	}
 
-	// At this point we can assume there is a deadlock that can be remediated by breaching maxUnavailability constraint
+	// At this point we can assume there is a deadlock that can only be remediated by breaching maxUnavailability constraint
 	// and scaling down an oldMS with unavailable machines by one.
 	//
-	// Note: in most cases this is only a formal violation of maxUnavailability, because there is a good changes
-	// that the machine that will be deleted is one of the unavailable machines
+	// Note: in most cases this is only a formal violation of maxUnavailability, because there is a good chance
+	// that the machine that will be deleted is one of the unavailable machines.
 	for _, oldMS := range p.oldMSs {
 		if ptr.Deref(oldMS.Status.AvailableReplicas, 0) == ptr.Deref(oldMS.Status.Replicas, 0) || ptr.Deref(oldMS.Spec.Replicas, 0) == 0 {
 			continue
 		}
 
 		newScaleIntent := max(ptr.Deref(oldMS.Spec.Replicas, 0)-1, 0)
-		log.Info(fmt.Sprintf("Setting scale down intent for %s to %d replicas (-%d) to unblock rollout stuck due to unavailable machine on oldMS only", oldMS.Name, newScaleIntent, 1), "MachineSet", klog.KObj(oldMS))
+		log.Info(fmt.Sprintf("Setting scale down intent for MachineSet %s to %d replicas (-%d) to unblock rollout stuck due to unavailable Machine on oldMS", oldMS.Name, newScaleIntent, 1), "MachineSet", klog.KObj(oldMS))
 		p.scaleIntents[oldMS.Name] = newScaleIntent
 		return
 	}
diff --git a/internal/controllers/machinedeployment/machinedeployment_rolling_test.go b/internal/controllers/machinedeployment/machinedeployment_rolling_test.go
index 98c6f28336f9..f45e0047ef97 100644
--- a/internal/controllers/machinedeployment/machinedeployment_rolling_test.go
+++ b/internal/controllers/machinedeployment/machinedeployment_rolling_test.go
@@ -303,7 +303,7 @@ func Test_reconcileOldMachineSetsRolloutRolling(t *testing.T) {
 			},
 			expectScaleIntent: map[string]int32{
 				// no new scale down intent for oldMSs (ms1):
-				// 2 available replicas from ms1 + 1 available replica from ms22 = 3 available replicas <= minAvailability, we cannot scale down
+				// 2 available replicas from ms1 + 1 available replica from ms2 = 3 available replicas == minAvailability, we cannot scale down
 			},
 		},
 		{
@@ -316,7 +316,7 @@ func Test_reconcileOldMachineSetsRolloutRolling(t *testing.T) {
 			},
 			expectScaleIntent: map[string]int32{
 				// no new scale down intent for oldMSs (ms1):
-				// 1 available replicas from ms1 + 1 available replica from ms22 = 3 available replicas = minAvailability, we cannot scale down
+				// 1 available replicas from ms1 + 1 available replica from ms2 = 2 available replicas < minAvailability, we cannot scale down
 			},
 			skipMaxUnavailabilityCheck: true,
 		},
@@ -330,21 +330,21 @@ func Test_reconcileOldMachineSetsRolloutRolling(t *testing.T) {
 			},
 			expectScaleIntent: map[string]int32{
 				// no new scale down intent for oldMSs (ms1):
-				// 3 available replicas from ms1 - 1 replica already scaling down from ms1 + 1 available replica from ms2 = 3 available replicas = minAvailability, we cannot further scale down
+				// 3 available replicas from ms1 - 1 replica already scaling down from ms1 + 1 available replica from ms2 = 3 available replicas == minAvailability, we cannot further scale down
 			},
 		},
 		{
 			name:        "do not scale down if there are more replicas than minAvailable replicas, but scale down from a previous reconcile already takes the availability buffer, scale down from a previous reconcile on another MS",
 			scaleIntent: map[string]int32{},
 			md:          createMD("v2", 6, withRolloutStrategy(3, 1)),
-			newMS:       createMS("ms3", "v2", 3, withStatusReplicas(0), withStatusAvailableReplicas(0)), // NewMS is scaling up from previous reconcile, but replicas do not exists yet
+			newMS:       createMS("ms3", "v2", 3, withStatusReplicas(0), withStatusAvailableReplicas(0)), // NewMS is scaling up from previous reconcile, but replicas do not exist yet
 			oldMSs: []*clusterv1.MachineSet{
 				createMS("ms1", "v1", 2, withStatusReplicas(3), withStatusAvailableReplicas(3)), // OldMS is scaling down from a previous reconcile
 				createMS("ms2", "v1", 3, withStatusReplicas(3), withStatusAvailableReplicas(3)), // OldMS
 			},
 			expectScaleIntent: map[string]int32{
 				// no new scale down intent for oldMSs (ms1):
-				// 3 available replicas from ms1 - 1 replica already scaling down from ms1 + 3 available replica from ms2 = 5 available replicas = minAvailability, we cannot further scale down
+				// 3 available replicas from ms1 - 1 replica already scaling down from ms1 + 3 available replica from ms2 = 5 available replicas == minAvailability, we cannot further scale down
 			},
 		},
 		{
@@ -360,7 +360,7 @@ func Test_reconcileOldMachineSetsRolloutRolling(t *testing.T) {
 			expectScaleIntent: map[string]int32{
 				"ms2": 1,
 				// no new scale down intent for oldMSs (ms1):
-				// 2 available replicas from ms1 + 2 available replicas from ms2 - 1 replica already scaling down from ms2 = 3 available replicas = minAvailability, we cannot further scale down
+				// 2 available replicas from ms1 + 2 available replicas from ms2 - 1 replica already scaling down from ms2 = 3 available replicas == minAvailability, we cannot further scale down
 			},
 		},
 		{
@@ -373,7 +373,7 @@ func Test_reconcileOldMachineSetsRolloutRolling(t *testing.T) {
 			},
 			expectScaleIntent: map[string]int32{
 				// no new scale down intent for oldMSs (ms1):
-				// 3 available replicas from ms1 + 0 available replicas from ms2 = 3 available replicas = minAvailability, we cannot further scale down
+				// 3 available replicas from ms1 + 0 available replicas from ms2 = 3 available replicas == minAvailability, we cannot further scale down
 			},
 		},
 		{
@@ -386,7 +386,7 @@ func Test_reconcileOldMachineSetsRolloutRolling(t *testing.T) {
 			},
 			expectScaleIntent: map[string]int32{
 				// no new scale down intent for oldMSs (ms1):
-				// 2 available replicas from ms1 + 1 available replicas from ms2 = 3 available replicas = minAvailability, we cannot further scale down
+				// 2 available replicas from ms1 + 1 available replicas from ms2 = 3 available replicas == minAvailability, we cannot further scale down
 			},
 		},
 		{
@@ -415,8 +415,8 @@ func Test_reconcileOldMachineSetsRolloutRolling(t *testing.T) {
 			expectScaleIntent: map[string]int32{
 				// new scale down intent for oldMSs:
 				// ms1 skipped in the first iteration because it does not have any unavailable replica
-				"ms2": 0, // 0 available replicas from ms2, it can be scaled down with impact on availability (-1)
-				"ms3": 0, // 0 available replicas from ms3, it can be scaled down with impact on availability (-2)
+				"ms2": 0, // 0 available replicas from ms2, it can be scaled down without impact on availability (-1)
+				"ms3": 0, // 0 available replicas from ms3, it can be scaled down without impact on availability (-2)
 				// no need to further scale down.
 			},
 		},
@@ -434,8 +434,8 @@ func Test_reconcileOldMachineSetsRolloutRolling(t *testing.T) {
 				// new scale down intent for oldMSs:
 				// ms1 skipped in the first iteration because it does not have any unavailable replica
 				"ms2": 0, // 0 available replicas from ms2, it can be scaled down to 0 without any impact on availability (-1)
-				"ms3": 0, // 0 available replicas from ms3, it can be scaled down to 0 without any impact on availability (-1)
-				"ms1": 2, // 3 available replicas from ms2 + 1 available replica from ms4 = 4 available replicas > minAvailability, scale down to 2 replicas (-1)
+				"ms3": 0, // 0 available replicas from ms3, it can be scaled down to 0 without any impact on availability (-2)
+				"ms1": 2, // 3 available replicas from ms1 + 1 available replica from ms4 = 4 available replicas > minAvailability, scale down to 2 replicas (-1)
 			},
 		},
 		{
@@ -446,14 +446,14 @@ func Test_reconcileOldMachineSetsRolloutRolling(t *testing.T) {
 			oldMSs: []*clusterv1.MachineSet{
 				createMS("ms1", "v0", 4, withStatusReplicas(3), withStatusAvailableReplicas(3)), // 1 replica without machine
 				createMS("ms2", "v1", 2, withStatusReplicas(1), withStatusAvailableReplicas(0)), // no replicas are available, 1 replica without machine
-				createMS("ms3", "v2", 5, withStatusReplicas(2), withStatusAvailableReplicas(0)), // no replicas are available, 2 replicas without machines
+				createMS("ms3", "v2", 5, withStatusReplicas(2), withStatusAvailableReplicas(0)), // no replicas are available, 3 replicas without machines
 			},
 			expectScaleIntent: map[string]int32{
 				// new scale down intent for oldMSs:
 				// ms1 skipped in the first iteration because it does not have any unavailable replica
-				"ms2": 0, // 0 available replicas from ms2, it can be scaled down to 0 without any impact on availability (-1)
+				"ms2": 0, // 0 available replicas from ms2, it can be scaled down to 0 without any impact on availability (-2)
 				"ms3": 0, // 0 available replicas from ms3, it can be scaled down to 0 without any impact on availability (-5)
-				"ms1": 2, // 3 available replicas from ms2 + 1 available replica from ms4 = 4 available replicas > minAvailability, scale down to 2 replicas, also drop the replica without machines (-2)
+				"ms1": 2, // 3 available replicas from ms1 + 1 available replica from ms4 = 4 available replicas > minAvailability, scale down to 2 replicas, also drop the replica without machine (-2)
 			},
 		},
 		{
@@ -470,7 +470,7 @@ func Test_reconcileOldMachineSetsRolloutRolling(t *testing.T) {
 				// new scale down intent for oldMSs:
 				"ms2": 0, // 0 available replicas from ms2, it can be scaled down to 0 without any impact on availability (-1)
 				// even if there is still room to scale down, we cannot scale down ms3: 1 available replica from ms1 + 1 available replica from ms4 = 2 available replicas < minAvailability
-				// does not make sense to continue scale down.
+				// does not make sense to continue scale down as there is no guarantee that MS3 would remove the unavailable replica
 			},
 		},
 		{
@@ -486,7 +486,7 @@ func Test_reconcileOldMachineSetsRolloutRolling(t *testing.T) {
 			expectScaleIntent: map[string]int32{
 				// new scale down intent for oldMSs:
 				"ms1": 1, // 1 replica without machine, it can be scaled down to 1 without any impact on availability (-1)
-				"ms2": 0, // 1 replica without machine, 0 available replicas from ms2, it can be scaled down to 0 without any impact on availability (-2)
+				"ms2": 0, // 2 replica without machine, 0 available replicas from ms2, it can be scaled down to 0 without any impact on availability (-3)
 				"ms3": 2, // 1 replica without machine, it can be scaled down to 2 without any impact on availability (-1); even if there is still room to scale down, we cannot further scale down ms3: 1 available replica from ms1 + 1 available replica from ms4 = 2 available replicas < minAvailability
 				// does not make sense to continue scale down.
 			},
@@ -549,7 +549,7 @@ func Test_reconcileOldMachineSetsRolloutRolling(t *testing.T) {
 			minAvailableReplicas := ptr.Deref(tt.md.Spec.Replicas, 0) - mdutil.MaxUnavailable(*tt.md)
 			totAvailableReplicas := ptr.Deref(mdutil.GetAvailableReplicaCountForMachineSets(append(tt.oldMSs, tt.newMS)), 0)
 			if !tt.skipMaxUnavailabilityCheck {
-				g.Expect(totAvailableReplicas).To(BeNumerically(">=", minAvailableReplicas), "totAvailable machines is less than MinUnavailable")
+				g.Expect(totAvailableReplicas).To(BeNumerically(">=", minAvailableReplicas), "totAvailable machines is less than minAvailable")
 			} else {
 				t.Logf("skipping MaxUnavailability check (totAvailableReplicas: %d, minAvailableReplicas: %d)", totAvailableReplicas, minAvailableReplicas)
 			}

From 40f23d9ea280d6e6faedf4a868cce0f927027174 Mon Sep 17 00:00:00 2001
From: fabriziopandini <fpandini@vmware.com>
Date: Tue, 7 Oct 2025 15:12:37 +0200
Subject: [PATCH 5/5] More comments

---
 .../machinedeployment_rolling.go              | 13 +++----
 .../machinedeployment_rolling_test.go         | 37 +++++++++++++++++--
 2 files changed, 40 insertions(+), 10 deletions(-)

diff --git a/internal/controllers/machinedeployment/machinedeployment_rolling.go b/internal/controllers/machinedeployment/machinedeployment_rolling.go
index cb01586c13e7..d2e4a8556527 100644
--- a/internal/controllers/machinedeployment/machinedeployment_rolling.go
+++ b/internal/controllers/machinedeployment/machinedeployment_rolling.go
@@ -263,7 +263,7 @@ func (p *rolloutPlanner) scaleDownOldMSs(ctx context.Context, totalScaleDownCoun
 
 		replicas := ptr.Deref(oldMS.Spec.Replicas, 0)
 		if v, ok := p.scaleIntents[oldMS.Name]; ok {
-			replicas = min(replicas, v)
+			replicas = v
 		}
 
 		// No op if this MS has been already scaled down to zero.
@@ -364,13 +364,11 @@ func (p *rolloutPlanner) reconcileDeadlockBreaker(ctx context.Context) {
 
 	// If there are scale operation in progress, no deadlock.
 	// Note: we are considering both scale operation from previous and current reconcile.
-	// Note: we are counting only pending scale up & down from the current status.replicas (so actual scale up & down of replicas number, not any other possible "re-alignment" of spec.replicas).
 	for _, ms := range allMSs {
-		scaleIntent := ptr.Deref(ms.Spec.Replicas, 0)
-		if v, ok := p.scaleIntents[ms.Name]; ok {
-			scaleIntent = v
+		if ptr.Deref(ms.Spec.Replicas, 0) != ptr.Deref(ms.Status.Replicas, 0) {
+			return
 		}
-		if scaleIntent != ptr.Deref(ms.Status.Replicas, 0) {
+		if _, ok := p.scaleIntents[ms.Name]; ok {
 			return
 		}
 	}
@@ -387,8 +385,9 @@ func (p *rolloutPlanner) reconcileDeadlockBreaker(ctx context.Context) {
 	// At this point we can assume there is a deadlock that can only be remediated by breaching maxUnavailability constraint
 	// and scaling down an oldMS with unavailable machines by one.
 	//
-	// Note: in most cases this is only a formal violation of maxUnavailability, because there is a good chance
+	// Note: In most cases this is only a formal violation of maxUnavailability, because there is a good chance
 	// that the machine that will be deleted is one of the unavailable machines.
+	// Note: This for loop relies on the same ordering of oldMSs that has been applied by reconcileOldMachineSetsRolloutRolling.
 	for _, oldMS := range p.oldMSs {
 		if ptr.Deref(oldMS.Status.AvailableReplicas, 0) == ptr.Deref(oldMS.Status.Replicas, 0) || ptr.Deref(oldMS.Spec.Replicas, 0) == 0 {
 			continue
diff --git a/internal/controllers/machinedeployment/machinedeployment_rolling_test.go b/internal/controllers/machinedeployment/machinedeployment_rolling_test.go
index f45e0047ef97..ab8bfff996ab 100644
--- a/internal/controllers/machinedeployment/machinedeployment_rolling_test.go
+++ b/internal/controllers/machinedeployment/machinedeployment_rolling_test.go
@@ -348,7 +348,7 @@ func Test_reconcileOldMachineSetsRolloutRolling(t *testing.T) {
 			},
 		},
 		{
-			name: "do not scale down if there are more replicas than minAvailable replicas, but scale down from current reconcile already takes the availability buffer",
+			name: "do not scale down if there are more replicas than minAvailable replicas, but scale down from current reconcile already takes the availability buffer (newMS is scaling down)",
 			scaleIntent: map[string]int32{
 				"ms2": 1, // newMS (ms2) has a scaling down intent from current reconcile
 			},
@@ -363,6 +363,22 @@ func Test_reconcileOldMachineSetsRolloutRolling(t *testing.T) {
 				// 2 available replicas from ms1 + 2 available replicas from ms2 - 1 replica already scaling down from ms2 = 3 available replicas == minAvailability, we cannot further scale down
 			},
 		},
+		{
+			name: "do not scale down if there are more replicas than minAvailable replicas, but scale down from current reconcile already takes the availability buffer (oldMS is scaling down)",
+			scaleIntent: map[string]int32{
+				"ms1": 1, // oldMS (ms1) has a scaling down intent from current reconcile
+			},
+			md:    createMD("v2", 3, withRolloutStrategy(1, 0)),
+			newMS: createMS("ms2", "v2", 2, withStatusReplicas(2), withStatusAvailableReplicas(2)),
+			oldMSs: []*clusterv1.MachineSet{
+				createMS("ms1", "v1", 2, withStatusReplicas(2), withStatusAvailableReplicas(2)),
+			},
+			expectScaleIntent: map[string]int32{
+				// no new scale down intent for oldMSs:
+				"ms1": 1,
+				// 2 available replicas from ms1 - 1 replica already scaling down from ms1 + 2 available replicas from ms2 = 3 available replicas == minAvailability, we cannot further scale down
+			},
+		},
 		{
 			name:        "do not scale down replicas when there are more replicas than minAvailable replicas, but not all the replicas are available (unavailability on newMS)",
 			scaleIntent: map[string]int32{},
@@ -501,11 +517,11 @@ func Test_reconcileOldMachineSetsRolloutRolling(t *testing.T) {
 			},
 			expectScaleIntent: map[string]int32{
 				// new scale down intent for oldMSs:
-				"ms1": 2, // 4 available replicas from ms1 - 1 replica already scaling down from ms1 + 2 available replicas from ms2 = 4 available replicas > minAvailability, scale down to 2 (-1)
+				"ms1": 2, // 4 available replicas from ms1 - 1 replica already scaling down from ms1 + 2 available replicas from ms2 = 5 available replicas > minAvailability, scale down to 2 (-1)
 			},
 		},
 		{
-			name: "scale down replicas when there are more replicas than minAvailable replicas, scale down keeps into account scale downs from the current reconcile",
+			name: "scale down replicas when there are more replicas than minAvailable replicas, scale down keeps into account scale downs from the current reconcile (newMS is scaling down)",
 			scaleIntent: map[string]int32{
 				"ms2": 1, // newMS (ms2) has a scaling down intent from current reconcile
 			},
@@ -520,6 +536,21 @@ func Test_reconcileOldMachineSetsRolloutRolling(t *testing.T) {
 				"ms1": 2, // 3 available replicas from ms1 + 2 available replicas from ms2 - 1 replica already scaling down from ms2 = 4 available replicas > minAvailability, scale down to 2 (-1)
 			},
 		},
+		{
+			name: "scale down replicas when there are more replicas than minAvailable replicas, scale down keeps into account scale downs from the current reconcile (oldMS is scaling down)",
+			scaleIntent: map[string]int32{
+				"ms1": 2, // oldMS (ms1) has a scaling down intent from current reconcile
+			},
+			md:    createMD("v2", 3, withRolloutStrategy(1, 0)),
+			newMS: createMS("ms2", "v3", 2, withStatusReplicas(2), withStatusAvailableReplicas(2)),
+			oldMSs: []*clusterv1.MachineSet{
+				createMS("ms1", "v0", 3, withStatusReplicas(3), withStatusAvailableReplicas(3)),
+			},
+			expectScaleIntent: map[string]int32{
+				// new scale down intent for oldMSs:
+				"ms1": 1, // 3 available replicas from ms1 - 1 replica already scaling down from ms1 + 2 available replicas from ms2 = 4 available replicas > minAvailability, scale down to 1 (-1)
+			},
+		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {