Skip to content

Commit 83b8694

Browse files
authored
Merge pull request #8658 from ykakarap/pr-topology-cp-upgrade
🌱 upgrading control plane should only be blocked if MD are upgrading (not just rolling out)
2 parents 6f0fe30 + 80576b0 commit 83b8694

File tree

8 files changed

+47
-157
lines changed

8 files changed

+47
-157
lines changed

internal/controllers/topology/cluster/conditions.go

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -150,11 +150,6 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste
150150
fmt.Fprintf(msgBuilder, " MachineDeployment(s) %s are upgrading",
151151
computeMachineDeploymentNameList(s.UpgradeTracker.MachineDeployments.UpgradingNames()),
152152
)
153-
154-
case s.Current.MachineDeployments.IsAnyRollingOut():
155-
fmt.Fprintf(msgBuilder, " MachineDeployment(s) %s are rolling out",
156-
computeMachineDeploymentNameList(s.UpgradeTracker.MachineDeployments.RolloutNames()),
157-
)
158153
}
159154

160155
conditions.Set(

internal/controllers/topology/cluster/conditions_test.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) {
196196
wantConditionMessage: "Control plane upgrade to v1.22.0 on hold. Control plane is reconciling desired replicas",
197197
},
198198
{
199-
name: "should set the condition to false if new version is not picked up because at least one of the machine deployment is rolling out",
199+
name: "should set the condition to false if new version is not picked up because at least one of the machine deployment is upgrading",
200200
reconcileErr: nil,
201201
cluster: &clusterv1.Cluster{},
202202
s: &scope.Scope{
@@ -231,14 +231,14 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) {
231231
UpgradeTracker: func() *scope.UpgradeTracker {
232232
ut := scope.NewUpgradeTracker()
233233
ut.ControlPlane.PendingUpgrade = true
234-
ut.MachineDeployments.MarkRollingOut("md0-abc123")
234+
ut.MachineDeployments.MarkUpgrading("md0-abc123")
235235
return ut
236236
}(),
237237
HookResponseTracker: scope.NewHookResponseTracker(),
238238
},
239239
wantConditionStatus: corev1.ConditionFalse,
240240
wantConditionReason: clusterv1.TopologyReconciledControlPlaneUpgradePendingReason,
241-
wantConditionMessage: "Control plane upgrade to v1.22.0 on hold. MachineDeployment(s) md0-abc123 are rolling out",
241+
wantConditionMessage: "Control plane upgrade to v1.22.0 on hold. MachineDeployment(s) md0-abc123 are upgrading",
242242
},
243243
{
244244
name: "should set the condition to false if control plane picked the new version but machine deployments did not because control plane is upgrading",
@@ -451,7 +451,7 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) {
451451
UpgradeTracker: func() *scope.UpgradeTracker {
452452
ut := scope.NewUpgradeTracker()
453453
ut.ControlPlane.PendingUpgrade = false
454-
ut.MachineDeployments.MarkUpgradingAndRollingOut("md0-abc123")
454+
ut.MachineDeployments.MarkUpgrading("md0-abc123")
455455
ut.MachineDeployments.MarkPendingUpgrade("md1-abc123")
456456
return ut
457457
}(),

internal/controllers/topology/cluster/desired_state.go

Lines changed: 16 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,17 @@ func (r *Reconciler) computeDesiredState(ctx context.Context, s *scope.Scope) (*
6363
}
6464
}
6565

66+
// Mark all the MachineDeployments that are currently upgrading.
67+
// This captured information is used for:
68+
// - Building the TopologyReconciled condition.
69+
// - Make upgrade decisions on the control plane.
70+
// - Making upgrade decisions on machine deployments.
71+
mdUpgradingNames, err := s.Current.MachineDeployments.Upgrading(ctx, r.Client)
72+
if err != nil {
73+
return nil, errors.Wrap(err, "failed to check if any MachineDeployment is upgrading")
74+
}
75+
s.UpgradeTracker.MachineDeployments.MarkUpgrading(mdUpgradingNames...)
76+
6677
// Compute the desired state of the ControlPlane object, eventually adding a reference to the
6778
// InfrastructureMachineTemplate generated by the previous step.
6879
if desiredState.ControlPlane.Object, err = r.computeControlPlane(ctx, s, desiredState.ControlPlane.InfrastructureMachineTemplate); err != nil {
@@ -424,10 +435,10 @@ func (r *Reconciler) computeControlPlaneVersion(ctx context.Context, s *scope.Sc
424435
}
425436

426437
// If the control plane is not upgrading or scaling, we can assume the control plane is stable.
427-
// However, we should also check for the MachineDeployments to be stable.
428-
// If the MachineDeployments are rolling out (still completing a previous upgrade), then do not pick
429-
// up the desiredVersion yet. We will pick up the new version after the MachineDeployments are stable.
430-
if s.Current.MachineDeployments.IsAnyRollingOut() {
438+
// However, we should also check for the MachineDeployments upgrading.
439+
// If the MachineDeployments are upgrading, then do not pick up the desiredVersion yet.
440+
// We will pick up the new version after the MachineDeployments finish upgrading.
441+
if len(s.UpgradeTracker.MachineDeployments.UpgradingNames()) > 0 {
431442
return *currentVersion, nil
432443
}
433444

@@ -524,21 +535,6 @@ func calculateRefDesiredAPIVersion(currentRef *corev1.ObjectReference, desiredRe
524535

525536
// computeMachineDeployments computes the desired state of the list of MachineDeployments.
526537
func (r *Reconciler) computeMachineDeployments(ctx context.Context, s *scope.Scope, desiredControlPlaneState *scope.ControlPlaneState) (scope.MachineDeploymentsStateMap, error) {
527-
// Mark all the MachineDeployments that are currently upgrading.
528-
// This captured information is used for:
529-
// - Building the TopologyReconciled condition.
530-
// - Making upgrade decisions on machine deployments.
531-
upgradingMDs, err := s.Current.MachineDeployments.Upgrading(ctx, r.Client)
532-
if err != nil {
533-
return nil, err
534-
}
535-
s.UpgradeTracker.MachineDeployments.MarkUpgradingAndRollingOut(upgradingMDs...)
536-
537-
// Mark all MachineDeployments that are currently rolling out.
538-
// This captured information is used for:
539-
// - Building the TopologyReconciled condition (when control plane upgrade is pending)
540-
s.UpgradeTracker.MachineDeployments.MarkRollingOut(s.Current.MachineDeployments.RollingOut()...)
541-
542538
machineDeploymentsStateMap := make(scope.MachineDeploymentsStateMap)
543539
for _, mdTopology := range s.Blueprint.Topology.Workers.MachineDeployments {
544540
desiredMachineDeployment, err := computeMachineDeployment(ctx, s, desiredControlPlaneState, mdTopology)
@@ -854,7 +850,7 @@ func computeMachineDeploymentVersion(s *scope.Scope, machineDeploymentTopology c
854850

855851
// Control plane and machine deployments are stable.
856852
// Ready to pick up the topology version.
857-
s.UpgradeTracker.MachineDeployments.MarkUpgradingAndRollingOut(currentMDState.Object.Name)
853+
s.UpgradeTracker.MachineDeployments.MarkUpgrading(currentMDState.Object.Name)
858854
return desiredVersion, nil
859855
}
860856

internal/controllers/topology/cluster/desired_state_test.go

Lines changed: 22 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -637,43 +637,6 @@ func TestComputeControlPlaneVersion(t *testing.T) {
637637
t.Run("Compute control plane version under various circumstances", func(t *testing.T) {
638638
defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.RuntimeSDK, true)()
639639

640-
// Note: the version used by the machine deployments does
641-
// not affect how we're determining the control plane version.
642-
// We only want to know if the machine deployments are stable.
643-
//
644-
// A machine deployment is considered stable if all the following are true:
645-
// - md.spec.replicas == md.status.replicas
646-
// - md.spec.replicas == md.status.updatedReplicas
647-
// - md.spec.replicas == md.status.readyReplicas
648-
// - md.Generation < md.status.observedGeneration
649-
//
650-
// A machine deployment is considered upgrading if any of the above conditions
651-
// is false.
652-
machineDeploymentStable := builder.MachineDeployment("test-namespace", "md1").
653-
WithGeneration(int64(1)).
654-
WithReplicas(int32(2)).
655-
WithStatus(clusterv1.MachineDeploymentStatus{
656-
ObservedGeneration: 2,
657-
Replicas: 2,
658-
UpdatedReplicas: 2,
659-
AvailableReplicas: 2,
660-
ReadyReplicas: 2,
661-
UnavailableReplicas: 0,
662-
}).
663-
Build()
664-
machineDeploymentRollingOut := builder.MachineDeployment("test-namespace", "md2").
665-
WithGeneration(int64(1)).
666-
WithReplicas(int32(2)).
667-
WithStatus(clusterv1.MachineDeploymentStatus{
668-
ObservedGeneration: 2,
669-
Replicas: 1,
670-
UpdatedReplicas: 1,
671-
AvailableReplicas: 1,
672-
ReadyReplicas: 1,
673-
UnavailableReplicas: 0,
674-
}).
675-
Build()
676-
677640
nonBlockingBeforeClusterUpgradeResponse := &runtimehooksv1.BeforeClusterUpgradeResponse{
678641
CommonRetryResponse: runtimehooksv1.CommonRetryResponse{
679642
CommonResponse: runtimehooksv1.CommonResponse{
@@ -708,13 +671,13 @@ func TestComputeControlPlaneVersion(t *testing.T) {
708671
}
709672

710673
tests := []struct {
711-
name string
712-
hookResponse *runtimehooksv1.BeforeClusterUpgradeResponse
713-
topologyVersion string
714-
controlPlaneObj *unstructured.Unstructured
715-
machineDeploymentsState scope.MachineDeploymentsStateMap
716-
expectedVersion string
717-
wantErr bool
674+
name string
675+
hookResponse *runtimehooksv1.BeforeClusterUpgradeResponse
676+
topologyVersion string
677+
controlPlaneObj *unstructured.Unstructured
678+
upgradingMachineDeployments []string
679+
expectedVersion string
680+
wantErr bool
718681
}{
719682
{
720683
name: "should return cluster.spec.topology.version if creating a new control plane",
@@ -779,7 +742,7 @@ func TestComputeControlPlaneVersion(t *testing.T) {
779742
expectedVersion: "v1.2.2",
780743
},
781744
{
782-
name: "should return controlplane.spec.version if control plane is not upgrading and not scaling and one of the machine deployments is rolling out",
745+
name: "should return controlplane.spec.version if control plane is not upgrading and not scaling and one of the machine deployments is upgrading",
783746
topologyVersion: "v1.2.3",
784747
controlPlaneObj: builder.ControlPlane("test1", "cp1").
785748
WithSpecFields(map[string]interface{}{
@@ -794,14 +757,11 @@ func TestComputeControlPlaneVersion(t *testing.T) {
794757
"status.unavailableReplicas": int64(0),
795758
}).
796759
Build(),
797-
machineDeploymentsState: scope.MachineDeploymentsStateMap{
798-
"md1": &scope.MachineDeploymentState{Object: machineDeploymentStable},
799-
"md2": &scope.MachineDeploymentState{Object: machineDeploymentRollingOut},
800-
},
801-
expectedVersion: "v1.2.2",
760+
upgradingMachineDeployments: []string{"md1"},
761+
expectedVersion: "v1.2.2",
802762
},
803763
{
804-
name: "should return cluster.spec.topology.version if control plane is not upgrading and not scaling and none of the machine deployments are rolling out - hook returns non blocking response",
764+
name: "should return cluster.spec.topology.version if control plane is not upgrading and not scaling and none of the machine deployments are upgrading - hook returns non blocking response",
805765
hookResponse: nonBlockingBeforeClusterUpgradeResponse,
806766
topologyVersion: "v1.2.3",
807767
controlPlaneObj: builder.ControlPlane("test1", "cp1").
@@ -817,11 +777,8 @@ func TestComputeControlPlaneVersion(t *testing.T) {
817777
"status.unavailableReplicas": int64(0),
818778
}).
819779
Build(),
820-
machineDeploymentsState: scope.MachineDeploymentsStateMap{
821-
"md1": &scope.MachineDeploymentState{Object: machineDeploymentStable},
822-
"md2": &scope.MachineDeploymentState{Object: machineDeploymentStable},
823-
},
824-
expectedVersion: "v1.2.3",
780+
upgradingMachineDeployments: []string{},
781+
expectedVersion: "v1.2.3",
825782
},
826783
{
827784
name: "should return the controlplane.spec.version if the BeforeClusterUpgrade hooks returns a blocking response",
@@ -840,10 +797,6 @@ func TestComputeControlPlaneVersion(t *testing.T) {
840797
"status.unavailableReplicas": int64(0),
841798
}).
842799
Build(),
843-
machineDeploymentsState: scope.MachineDeploymentsStateMap{
844-
"md1": &scope.MachineDeploymentState{Object: machineDeploymentStable},
845-
"md2": &scope.MachineDeploymentState{Object: machineDeploymentStable},
846-
},
847800
expectedVersion: "v1.2.2",
848801
},
849802
{
@@ -863,10 +816,6 @@ func TestComputeControlPlaneVersion(t *testing.T) {
863816
"status.unavailableReplicas": int64(0),
864817
}).
865818
Build(),
866-
machineDeploymentsState: scope.MachineDeploymentsStateMap{
867-
"md1": &scope.MachineDeploymentState{Object: machineDeploymentStable},
868-
"md2": &scope.MachineDeploymentState{Object: machineDeploymentStable},
869-
},
870819
expectedVersion: "v1.2.2",
871820
wantErr: true,
872821
},
@@ -889,12 +838,14 @@ func TestComputeControlPlaneVersion(t *testing.T) {
889838
Namespace: "test-ns",
890839
},
891840
},
892-
ControlPlane: &scope.ControlPlaneState{Object: tt.controlPlaneObj},
893-
MachineDeployments: tt.machineDeploymentsState,
841+
ControlPlane: &scope.ControlPlaneState{Object: tt.controlPlaneObj},
894842
},
895843
UpgradeTracker: scope.NewUpgradeTracker(),
896844
HookResponseTracker: scope.NewHookResponseTracker(),
897845
}
846+
if len(tt.upgradingMachineDeployments) > 0 {
847+
s.UpgradeTracker.MachineDeployments.MarkUpgrading(tt.upgradingMachineDeployments...)
848+
}
898849

899850
runtimeClient := fakeruntimeclient.NewRuntimeClientBuilder().
900851
WithCatalog(catalog).
@@ -1723,7 +1674,6 @@ func TestComputeMachineDeployment(t *testing.T) {
17231674
s.Current.ControlPlane = &scope.ControlPlaneState{
17241675
Object: controlPlaneStable123,
17251676
}
1726-
s.UpgradeTracker.MachineDeployments.MarkRollingOut(s.Current.MachineDeployments.RollingOut()...)
17271677
desiredControlPlaneState := &scope.ControlPlaneState{
17281678
Object: controlPlaneStable123,
17291679
}
@@ -1733,7 +1683,7 @@ func TestComputeMachineDeployment(t *testing.T) {
17331683
Name: "big-pool-of-machines",
17341684
Replicas: pointer.Int32(2),
17351685
}
1736-
s.UpgradeTracker.MachineDeployments.MarkUpgradingAndRollingOut(tt.upgradingMachineDeployments...)
1686+
s.UpgradeTracker.MachineDeployments.MarkUpgrading(tt.upgradingMachineDeployments...)
17371687
obj, err := computeMachineDeployment(ctx, s, desiredControlPlaneState, mdTopology)
17381688
g.Expect(err).NotTo(HaveOccurred())
17391689
g.Expect(*obj.Object.Spec.Template.Spec.Version).To(Equal(tt.expectedVersion))
@@ -1883,7 +1833,7 @@ func TestComputeMachineDeploymentVersion(t *testing.T) {
18831833
expectedVersion: "v1.2.2",
18841834
},
18851835
{
1886-
name: "should return cluster.spec.topology.version if the control plane is not upgrading, not scaling, not ready to upgrade and none of the machine deployments are rolling out",
1836+
name: "should return cluster.spec.topology.version if the control plane is not upgrading, not scaling, not ready to upgrade and none of the machine deployments are upgrading",
18871837
currentMachineDeploymentState: &scope.MachineDeploymentState{Object: builder.MachineDeployment("test1", "md-current").WithVersion("v1.2.2").Build()},
18881838
upgradingMachineDeployments: []string{},
18891839
currentControlPlane: controlPlaneStable123,
@@ -1892,7 +1842,7 @@ func TestComputeMachineDeploymentVersion(t *testing.T) {
18921842
expectedVersion: "v1.2.3",
18931843
},
18941844
{
1895-
name: "should return cluster.spec.topology.version if control plane is stable, other machine deployments are rolling out, concurrency limit not reached",
1845+
name: "should return cluster.spec.topology.version if control plane is stable, other machine deployments are upgrading, concurrency limit not reached",
18961846
currentMachineDeploymentState: &scope.MachineDeploymentState{Object: builder.MachineDeployment("test1", "md-current").WithVersion("v1.2.2").Build()},
18971847
upgradingMachineDeployments: []string{"upgrading-md1"},
18981848
upgradeConcurrency: 2,
@@ -1902,7 +1852,7 @@ func TestComputeMachineDeploymentVersion(t *testing.T) {
19021852
expectedVersion: "v1.2.3",
19031853
},
19041854
{
1905-
name: "should return machine deployment's spec.template.spec.version if control plane is stable, other machine deployments are rolling out, concurrency limit reached",
1855+
name: "should return machine deployment's spec.template.spec.version if control plane is stable, other machine deployments are upgrading, concurrency limit reached",
19061856
currentMachineDeploymentState: &scope.MachineDeploymentState{Object: builder.MachineDeployment("test1", "md-current").WithVersion("v1.2.2").Build()},
19071857
upgradingMachineDeployments: []string{"upgrading-md1", "upgrading-md2"},
19081858
upgradeConcurrency: 2,
@@ -1931,7 +1881,7 @@ func TestComputeMachineDeploymentVersion(t *testing.T) {
19311881
UpgradeTracker: scope.NewUpgradeTracker(scope.MaxMDUpgradeConcurrency(tt.upgradeConcurrency)),
19321882
}
19331883
desiredControlPlaneState := &scope.ControlPlaneState{Object: tt.desiredControlPlane}
1934-
s.UpgradeTracker.MachineDeployments.MarkUpgradingAndRollingOut(tt.upgradingMachineDeployments...)
1884+
s.UpgradeTracker.MachineDeployments.MarkUpgrading(tt.upgradingMachineDeployments...)
19351885
version, err := computeMachineDeploymentVersion(s, tt.machineDeploymentTopology, desiredControlPlaneState, tt.currentMachineDeploymentState)
19361886
g.Expect(err).NotTo(HaveOccurred())
19371887
g.Expect(version).To(Equal(tt.expectedVersion))

internal/controllers/topology/cluster/reconcile_state.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,7 @@ func (r *Reconciler) callAfterClusterUpgrade(ctx context.Context, s *scope.Scope
264264
}
265265

266266
if !cpUpgrading && !cpScaling && !s.UpgradeTracker.ControlPlane.PendingUpgrade && // Control Plane checks
267-
len(s.UpgradeTracker.MachineDeployments.RolloutNames()) == 0 && // Machine deployments are not rollout out or not about to roll out
267+
len(s.UpgradeTracker.MachineDeployments.UpgradingNames()) == 0 && // Machine deployments are not upgrading or not about to upgrade
268268
!s.UpgradeTracker.MachineDeployments.PendingUpgrade() && // No MachineDeployments have an upgrade pending
269269
!s.UpgradeTracker.MachineDeployments.DeferredUpgrade() { // No MachineDeployments have an upgrade deferred
270270
// Everything is stable and the cluster can be considered fully upgraded.

internal/controllers/topology/cluster/reconcile_state_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -685,7 +685,7 @@ func TestReconcile_callAfterClusterUpgrade(t *testing.T) {
685685
wantError: false,
686686
},
687687
{
688-
name: "hook should not be called if the control plane is stable at desired version but MDs are rolling out - hook is marked",
688+
name: "hook should not be called if the control plane is stable at desired version but MDs are upgrading - hook is marked",
689689
s: &scope.Scope{
690690
Blueprint: &scope.ClusterBlueprint{
691691
Topology: &clusterv1.Topology{
@@ -713,7 +713,7 @@ func TestReconcile_callAfterClusterUpgrade(t *testing.T) {
713713
UpgradeTracker: func() *scope.UpgradeTracker {
714714
ut := scope.NewUpgradeTracker()
715715
ut.ControlPlane.PendingUpgrade = false
716-
ut.MachineDeployments.MarkRollingOut("md1")
716+
ut.MachineDeployments.MarkUpgrading("md1")
717717
return ut
718718
}(),
719719
},

internal/controllers/topology/cluster/scope/state.go

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ import (
2626
"sigs.k8s.io/controller-runtime/pkg/client"
2727

2828
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
29-
"sigs.k8s.io/cluster-api/internal/controllers/machinedeployment/mdutil"
3029
)
3130

3231
// ClusterState holds all the objects representing the state of a managed Cluster topology.
@@ -62,24 +61,6 @@ type ControlPlaneState struct {
6261
// MachineDeploymentsStateMap holds a collection of MachineDeployment states.
6362
type MachineDeploymentsStateMap map[string]*MachineDeploymentState
6463

65-
// RollingOut returns the list of the machine deployments
66-
// that are rolling out.
67-
func (mds MachineDeploymentsStateMap) RollingOut() []string {
68-
names := []string{}
69-
for _, md := range mds {
70-
if md.IsRollingOut() {
71-
names = append(names, md.Object.Name)
72-
}
73-
}
74-
return names
75-
}
76-
77-
// IsAnyRollingOut returns true if at least one of the
78-
// machine deployments is rolling out. False, otherwise.
79-
func (mds MachineDeploymentsStateMap) IsAnyRollingOut() bool {
80-
return len(mds.RollingOut()) != 0
81-
}
82-
8364
// Upgrading returns the list of the machine deployments
8465
// that are upgrading.
8566
func (mds MachineDeploymentsStateMap) Upgrading(ctx context.Context, c client.Client) ([]string, error) {
@@ -112,15 +93,6 @@ type MachineDeploymentState struct {
11293
MachineHealthCheck *clusterv1.MachineHealthCheck
11394
}
11495

115-
// IsRollingOut determines if the machine deployment is rolling out.
116-
// A machine deployment is considered upgrading if:
117-
// - if any of the replicas of the machine deployment is not ready.
118-
func (md *MachineDeploymentState) IsRollingOut() bool {
119-
return !mdutil.DeploymentComplete(md.Object, &md.Object.Status) ||
120-
*md.Object.Spec.Replicas != md.Object.Status.ReadyReplicas ||
121-
md.Object.Status.UnavailableReplicas > 0
122-
}
123-
12496
// IsUpgrading determines if the MachineDeployment is upgrading.
12597
// A machine deployment is considered upgrading if at least one of the Machines of this
12698
// MachineDeployment has a different version.

0 commit comments

Comments
 (0)