Skip to content

Commit 0ba7f45

Browse files
authored
Merge pull request #8628 from ykakarap/pr-double-rollout_topology
🐛 topology controller should avoid unnecessary rollouts during upgrades
2 parents a1c6c44 + 0be8e6e commit 0ba7f45

File tree

10 files changed

+650
-375
lines changed

10 files changed

+650
-375
lines changed

api/v1beta1/condition_consts.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,11 @@ const (
286286
// not yet completed because Control Plane is not yet updated to match the desired topology spec.
287287
TopologyReconciledControlPlaneUpgradePendingReason = "ControlPlaneUpgradePending"
288288

289+
// TopologyReconciledMachineDeploymentsCreatePendingReason (Severity=Info) documents reconciliation of a Cluster topology
290+
// not yet completed because at least one of the MachineDeployments is yet to be created.
291+
// This generally happens because new MachineDeployment creations are held off while the ControlPlane is not stable.
292+
TopologyReconciledMachineDeploymentsCreatePendingReason = "MachineDeploymentsCreatePending"
293+
289294
// TopologyReconciledMachineDeploymentsUpgradePendingReason (Severity=Info) documents reconciliation of a Cluster topology
290295
// not yet completed because at least one of the MachineDeployments is not yet updated to match the desired topology spec.
291296
TopologyReconciledMachineDeploymentsUpgradePendingReason = "MachineDeploymentsUpgradePending"

internal/controllers/topology/cluster/conditions.go

Lines changed: 30 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste
5656
)
5757
return nil
5858
}
59+
5960
// If an error occurred during reconciliation set the TopologyReconciled condition to false.
6061
// Add the error message from the reconcile function to the message of the condition.
6162
if reconcileErr != nil {
@@ -108,25 +109,37 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste
108109
// * either the Control Plane or any of the MachineDeployments are still pending to pick up the new version
109110
// (generally happens when upgrading the cluster)
110111
// * when there are MachineDeployments for which the upgrade has been deferred
111-
if s.UpgradeTracker.ControlPlane.PendingUpgrade ||
112-
s.UpgradeTracker.MachineDeployments.PendingUpgrade() ||
112+
// * when new MachineDeployments are pending to be created
113+
// (generally happens when upgrading the cluster)
114+
if s.UpgradeTracker.ControlPlane.IsPendingUpgrade ||
115+
s.UpgradeTracker.MachineDeployments.IsAnyPendingCreate() ||
116+
s.UpgradeTracker.MachineDeployments.IsAnyPendingUpgrade() ||
113117
s.UpgradeTracker.MachineDeployments.DeferredUpgrade() {
114118
msgBuilder := &strings.Builder{}
115119
var reason string
116120

121+
// TODO(ykakarap): Evaluate potential improvements to building the condition. Multiple causes can trigger the
122+
// condition to be false at the same time (Example: ControlPlane.IsPendingUpgrade and MachineDeployments.IsAnyPendingCreate can
123+
// occur at the same time). Find better wording and `Reason` for the condition so that the condition can be rich
124+
// with all the relevant information.
117125
switch {
118-
case s.UpgradeTracker.ControlPlane.PendingUpgrade:
119-
fmt.Fprintf(msgBuilder, "Control plane upgrade to %s on hold.", s.Blueprint.Topology.Version)
126+
case s.UpgradeTracker.ControlPlane.IsPendingUpgrade:
127+
fmt.Fprintf(msgBuilder, "Control plane rollout and upgrade to version %s on hold.", s.Blueprint.Topology.Version)
120128
reason = clusterv1.TopologyReconciledControlPlaneUpgradePendingReason
121-
case s.UpgradeTracker.MachineDeployments.PendingUpgrade():
122-
fmt.Fprintf(msgBuilder, "MachineDeployment(s) %s upgrade to version %s on hold.",
123-
computeMachineDeploymentNameList(s.UpgradeTracker.MachineDeployments.PendingUpgradeNames()),
129+
case s.UpgradeTracker.MachineDeployments.IsAnyPendingUpgrade():
130+
fmt.Fprintf(msgBuilder, "MachineDeployment(s) %s rollout and upgrade to version %s on hold.",
131+
computeNameList(s.UpgradeTracker.MachineDeployments.PendingUpgradeNames()),
124132
s.Blueprint.Topology.Version,
125133
)
126134
reason = clusterv1.TopologyReconciledMachineDeploymentsUpgradePendingReason
135+
case s.UpgradeTracker.MachineDeployments.IsAnyPendingCreate():
136+
fmt.Fprintf(msgBuilder, "MachineDeployment(s) for Topologies %s creation on hold.",
137+
computeNameList(s.UpgradeTracker.MachineDeployments.PendingCreateTopologyNames()),
138+
)
139+
reason = clusterv1.TopologyReconciledMachineDeploymentsCreatePendingReason
127140
case s.UpgradeTracker.MachineDeployments.DeferredUpgrade():
128-
fmt.Fprintf(msgBuilder, "MachineDeployment(s) %s upgrade to version %s deferred.",
129-
computeMachineDeploymentNameList(s.UpgradeTracker.MachineDeployments.DeferredUpgradeNames()),
141+
fmt.Fprintf(msgBuilder, "MachineDeployment(s) %s rollout and upgrade to version %s deferred.",
142+
computeNameList(s.UpgradeTracker.MachineDeployments.DeferredUpgradeNames()),
130143
s.Blueprint.Topology.Version,
131144
)
132145
reason = clusterv1.TopologyReconciledMachineDeploymentsUpgradeDeferredReason
@@ -148,7 +161,7 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste
148161

149162
case len(s.UpgradeTracker.MachineDeployments.UpgradingNames()) > 0:
150163
fmt.Fprintf(msgBuilder, " MachineDeployment(s) %s are upgrading",
151-
computeMachineDeploymentNameList(s.UpgradeTracker.MachineDeployments.UpgradingNames()),
164+
computeNameList(s.UpgradeTracker.MachineDeployments.UpgradingNames()),
152165
)
153166
}
154167

@@ -175,12 +188,13 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste
175188
return nil
176189
}
177190

178-
// computeMachineDeploymentNameList computes the MD name list to be shown in conditions.
179-
// It shortens the list to at most 5 MachineDeployment names.
180-
func computeMachineDeploymentNameList(mdList []string) any {
181-
if len(mdList) > 5 {
182-
mdList = append(mdList[:5], "...")
191+
// computeNameList computes list of names form the given list to be shown in conditions.
192+
// It shortens the list to at most 5 names and adds an ellipsis at the end if the list
193+
// has more than 5 elements.
194+
func computeNameList(list []string) any {
195+
if len(list) > 5 {
196+
list = append(list[:5], "...")
183197
}
184198

185-
return strings.Join(mdList, ", ")
199+
return strings.Join(list, ", ")
186200
}

internal/controllers/topology/cluster/conditions_test.go

Lines changed: 52 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -123,15 +123,15 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) {
123123
},
124124
UpgradeTracker: func() *scope.UpgradeTracker {
125125
ut := scope.NewUpgradeTracker()
126-
ut.ControlPlane.PendingUpgrade = true
126+
ut.ControlPlane.IsPendingUpgrade = true
127127
ut.ControlPlane.IsProvisioning = true
128128
return ut
129129
}(),
130130
HookResponseTracker: scope.NewHookResponseTracker(),
131131
},
132132
wantConditionStatus: corev1.ConditionFalse,
133133
wantConditionReason: clusterv1.TopologyReconciledControlPlaneUpgradePendingReason,
134-
wantConditionMessage: "Control plane upgrade to v1.22.0 on hold. Control plane is completing initial provisioning",
134+
wantConditionMessage: "Control plane rollout and upgrade to version v1.22.0 on hold. Control plane is completing initial provisioning",
135135
},
136136
{
137137
name: "should set the condition to false if new version is not picked up because control plane is upgrading",
@@ -154,15 +154,15 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) {
154154
},
155155
UpgradeTracker: func() *scope.UpgradeTracker {
156156
ut := scope.NewUpgradeTracker()
157-
ut.ControlPlane.PendingUpgrade = true
157+
ut.ControlPlane.IsPendingUpgrade = true
158158
ut.ControlPlane.IsUpgrading = true
159159
return ut
160160
}(),
161161
HookResponseTracker: scope.NewHookResponseTracker(),
162162
},
163163
wantConditionStatus: corev1.ConditionFalse,
164164
wantConditionReason: clusterv1.TopologyReconciledControlPlaneUpgradePendingReason,
165-
wantConditionMessage: "Control plane upgrade to v1.22.0 on hold. Control plane is upgrading to version v1.21.2",
165+
wantConditionMessage: "Control plane rollout and upgrade to version v1.22.0 on hold. Control plane is upgrading to version v1.21.2",
166166
},
167167
{
168168
name: "should set the condition to false if new version is not picked up because control plane is scaling",
@@ -185,15 +185,15 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) {
185185
},
186186
UpgradeTracker: func() *scope.UpgradeTracker {
187187
ut := scope.NewUpgradeTracker()
188-
ut.ControlPlane.PendingUpgrade = true
188+
ut.ControlPlane.IsPendingUpgrade = true
189189
ut.ControlPlane.IsScaling = true
190190
return ut
191191
}(),
192192
HookResponseTracker: scope.NewHookResponseTracker(),
193193
},
194194
wantConditionStatus: corev1.ConditionFalse,
195195
wantConditionReason: clusterv1.TopologyReconciledControlPlaneUpgradePendingReason,
196-
wantConditionMessage: "Control plane upgrade to v1.22.0 on hold. Control plane is reconciling desired replicas",
196+
wantConditionMessage: "Control plane rollout and upgrade to version v1.22.0 on hold. Control plane is reconciling desired replicas",
197197
},
198198
{
199199
name: "should set the condition to false if new version is not picked up because at least one of the machine deployment is upgrading",
@@ -230,15 +230,15 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) {
230230
},
231231
UpgradeTracker: func() *scope.UpgradeTracker {
232232
ut := scope.NewUpgradeTracker()
233-
ut.ControlPlane.PendingUpgrade = true
233+
ut.ControlPlane.IsPendingUpgrade = true
234234
ut.MachineDeployments.MarkUpgrading("md0-abc123")
235235
return ut
236236
}(),
237237
HookResponseTracker: scope.NewHookResponseTracker(),
238238
},
239239
wantConditionStatus: corev1.ConditionFalse,
240240
wantConditionReason: clusterv1.TopologyReconciledControlPlaneUpgradePendingReason,
241-
wantConditionMessage: "Control plane upgrade to v1.22.0 on hold. MachineDeployment(s) md0-abc123 are upgrading",
241+
wantConditionMessage: "Control plane rollout and upgrade to version v1.22.0 on hold. MachineDeployment(s) md0-abc123 are upgrading",
242242
},
243243
{
244244
name: "should set the condition to false if control plane picked the new version but machine deployments did not because control plane is upgrading",
@@ -275,7 +275,7 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) {
275275
},
276276
UpgradeTracker: func() *scope.UpgradeTracker {
277277
ut := scope.NewUpgradeTracker()
278-
ut.ControlPlane.PendingUpgrade = false
278+
ut.ControlPlane.IsPendingUpgrade = false
279279
ut.ControlPlane.IsUpgrading = true
280280
ut.MachineDeployments.MarkPendingUpgrade("md0-abc123")
281281
return ut
@@ -284,7 +284,7 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) {
284284
},
285285
wantConditionStatus: corev1.ConditionFalse,
286286
wantConditionReason: clusterv1.TopologyReconciledMachineDeploymentsUpgradePendingReason,
287-
wantConditionMessage: "MachineDeployment(s) md0-abc123 upgrade to version v1.22.0 on hold. Control plane is upgrading to version v1.22.0",
287+
wantConditionMessage: "MachineDeployment(s) md0-abc123 rollout and upgrade to version v1.22.0 on hold. Control plane is upgrading to version v1.22.0",
288288
},
289289
{
290290
name: "should set the condition to false if control plane picked the new version but machine deployments did not because control plane is scaling",
@@ -321,7 +321,7 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) {
321321
},
322322
UpgradeTracker: func() *scope.UpgradeTracker {
323323
ut := scope.NewUpgradeTracker()
324-
ut.ControlPlane.PendingUpgrade = false
324+
ut.ControlPlane.IsPendingUpgrade = false
325325
ut.ControlPlane.IsScaling = true
326326
ut.MachineDeployments.MarkPendingUpgrade("md0-abc123")
327327
return ut
@@ -330,7 +330,39 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) {
330330
},
331331
wantConditionStatus: corev1.ConditionFalse,
332332
wantConditionReason: clusterv1.TopologyReconciledMachineDeploymentsUpgradePendingReason,
333-
wantConditionMessage: "MachineDeployment(s) md0-abc123 upgrade to version v1.22.0 on hold. Control plane is reconciling desired replicas",
333+
wantConditionMessage: "MachineDeployment(s) md0-abc123 rollout and upgrade to version v1.22.0 on hold. Control plane is reconciling desired replicas",
334+
},
335+
{
336+
name: "should set the condition to false if control plane picked the new version but there are machine deployments pending create because control plane is scaling",
337+
reconcileErr: nil,
338+
cluster: &clusterv1.Cluster{},
339+
s: &scope.Scope{
340+
Blueprint: &scope.ClusterBlueprint{
341+
Topology: &clusterv1.Topology{
342+
Version: "v1.22.0",
343+
},
344+
},
345+
Current: &scope.ClusterState{
346+
Cluster: &clusterv1.Cluster{},
347+
ControlPlane: &scope.ControlPlaneState{
348+
Object: builder.ControlPlane("ns1", "controlplane1").
349+
WithVersion("v1.22.0").
350+
WithReplicas(3).
351+
Build(),
352+
},
353+
},
354+
UpgradeTracker: func() *scope.UpgradeTracker {
355+
ut := scope.NewUpgradeTracker()
356+
ut.ControlPlane.IsPendingUpgrade = false
357+
ut.ControlPlane.IsScaling = true
358+
ut.MachineDeployments.MarkPendingCreate("md0")
359+
return ut
360+
}(),
361+
HookResponseTracker: scope.NewHookResponseTracker(),
362+
},
363+
wantConditionStatus: corev1.ConditionFalse,
364+
wantConditionReason: clusterv1.TopologyReconciledMachineDeploymentsCreatePendingReason,
365+
wantConditionMessage: "MachineDeployment(s) for Topologies md0 creation on hold. Control plane is reconciling desired replicas",
334366
},
335367
{
336368
name: "should set the condition to true if control plane picked the new version and is upgrading but there are no machine deployments",
@@ -353,7 +385,7 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) {
353385
},
354386
UpgradeTracker: func() *scope.UpgradeTracker {
355387
ut := scope.NewUpgradeTracker()
356-
ut.ControlPlane.PendingUpgrade = false
388+
ut.ControlPlane.IsPendingUpgrade = false
357389
ut.ControlPlane.IsUpgrading = true
358390
return ut
359391
}(),
@@ -382,7 +414,7 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) {
382414
},
383415
UpgradeTracker: func() *scope.UpgradeTracker {
384416
ut := scope.NewUpgradeTracker()
385-
ut.ControlPlane.PendingUpgrade = false
417+
ut.ControlPlane.IsPendingUpgrade = false
386418
ut.ControlPlane.IsScaling = true
387419
return ut
388420
}(),
@@ -450,7 +482,7 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) {
450482
},
451483
UpgradeTracker: func() *scope.UpgradeTracker {
452484
ut := scope.NewUpgradeTracker()
453-
ut.ControlPlane.PendingUpgrade = false
485+
ut.ControlPlane.IsPendingUpgrade = false
454486
ut.MachineDeployments.MarkUpgrading("md0-abc123")
455487
ut.MachineDeployments.MarkPendingUpgrade("md1-abc123")
456488
return ut
@@ -469,7 +501,7 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) {
469501
},
470502
wantConditionStatus: corev1.ConditionFalse,
471503
wantConditionReason: clusterv1.TopologyReconciledMachineDeploymentsUpgradePendingReason,
472-
wantConditionMessage: "MachineDeployment(s) md1-abc123 upgrade to version v1.22.0 on hold. MachineDeployment(s) md0-abc123 are upgrading",
504+
wantConditionMessage: "MachineDeployment(s) md1-abc123 rollout and upgrade to version v1.22.0 on hold. MachineDeployment(s) md0-abc123 are upgrading",
473505
},
474506
{
475507
name: "should set the condition to false if some machine deployments have not picked the new version because their upgrade has been deferred",
@@ -520,15 +552,15 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) {
520552
},
521553
UpgradeTracker: func() *scope.UpgradeTracker {
522554
ut := scope.NewUpgradeTracker()
523-
ut.ControlPlane.PendingUpgrade = false
555+
ut.ControlPlane.IsPendingUpgrade = false
524556
ut.MachineDeployments.MarkDeferredUpgrade("md1-abc123")
525557
return ut
526558
}(),
527559
HookResponseTracker: scope.NewHookResponseTracker(),
528560
},
529561
wantConditionStatus: corev1.ConditionFalse,
530562
wantConditionReason: clusterv1.TopologyReconciledMachineDeploymentsUpgradeDeferredReason,
531-
wantConditionMessage: "MachineDeployment(s) md1-abc123 upgrade to version v1.22.0 deferred.",
563+
wantConditionMessage: "MachineDeployment(s) md1-abc123 rollout and upgrade to version v1.22.0 deferred.",
532564
},
533565
{
534566
name: "should set the condition to true if there are no reconcile errors and control plane and all machine deployments picked up the new version",
@@ -579,7 +611,7 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) {
579611
},
580612
UpgradeTracker: func() *scope.UpgradeTracker {
581613
ut := scope.NewUpgradeTracker()
582-
ut.ControlPlane.PendingUpgrade = false
614+
ut.ControlPlane.IsPendingUpgrade = false
583615
return ut
584616
}(),
585617
HookResponseTracker: scope.NewHookResponseTracker(),
@@ -656,7 +688,7 @@ func TestComputeMachineDeploymentNameList(t *testing.T) {
656688
for _, tt := range tests {
657689
t.Run(tt.name, func(t *testing.T) {
658690
g := NewWithT(t)
659-
g.Expect(computeMachineDeploymentNameList(tt.mdList)).To(Equal(tt.expected))
691+
g.Expect(computeNameList(tt.mdList)).To(Equal(tt.expected))
660692
})
661693
}
662694
}

0 commit comments

Comments
 (0)