Skip to content

Commit a595267

Browse files
authored
fix: ResourceGroup unnecessary condition updates (#1571)
- Deprecate the Reconciling condition on the ResourceGroup, because it doesn't actually reflect the aggregate reconcile status of the objects synced by Config Sync from the source of truth. This condition will now always have a status of False and a reason of FinishReconciling to avoid breaking any clients that were waiting for that state. This change reduces churn on the ResourceGroup objects, which had been frequently flipping back and forth between a status of True and False. To learn how to wait for all the managed objects to be synced and reconciled, see https://cloud.google.com/kubernetes-engine/enterprise/config-sync/docs/how-to/monitor-rootsync-reposync#confirm_resources_in_the_commit_are_reconciled - This change also removes the case that was setting the Reconciling condition reason to ExceedTimeout, which is an implementation detail of the ResourceGroup controller, has no value to the user and doesn't need to be exposed. - Docs change: cl/731842416 Fixes: b/399705420
1 parent 5109937 commit a595267

File tree

1 file changed

+18
-26
lines changed

1 file changed

+18
-26
lines changed

pkg/resourcegroup/controllers/resourcegroup/resourcegroup_controller.go

Lines changed: 18 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -99,16 +99,17 @@ func (r *reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu
9999
return ctrl.Result{}, nil
100100
}
101101

102-
newStatus := r.startReconcilingStatus(rgObj.Status)
103-
if err := r.updateStatusKptGroup(ctx, rgObj, newStatus); err != nil {
104-
r.Logger(ctx).Error(err, "failed to update ResourceGroup to start reconciling")
105-
return ctrl.Result{Requeue: true}, err
106-
}
102+
// Since this controller doesn't perform any writes in any external systems,
103+
// it doesn't need to set the Reconciling condition status to True before
104+
// building the new status, because there are no possible side-effects that
105+
// the user might need to know about when reconciling fails.
106+
// So skip initializing the Reconciling & Stalled conditions and only set
107+
// them when reconciling succeeds or fails.
107108

108109
id := getInventoryID(rgObj.Labels)
109-
newStatus = r.endReconcilingStatus(ctx, id, req.NamespacedName, rgObj.Spec, rgObj.Status, rgObj.Generation)
110+
newStatus := r.endReconcilingStatus(ctx, id, req.NamespacedName, rgObj.Spec, rgObj.Status, rgObj.Generation)
110111
if err := r.updateStatusKptGroup(ctx, rgObj, newStatus); err != nil {
111-
r.Logger(ctx).Error(err, "failed to update ResourceGroup to finish reconciling")
112+
r.Logger(ctx).Error(err, "failed to update ResourceGroup")
112113
return ctrl.Result{Requeue: true}, err
113114
}
114115

@@ -169,19 +170,6 @@ func (r *reconciler) updateStatusKptGroup(ctx context.Context, resgroup *v1alpha
169170
return r.client.Status().Update(ctx, resgroup, client.FieldOwner(FieldManager))
170171
}
171172

172-
func (r *reconciler) startReconcilingStatus(status v1alpha1.ResourceGroupStatus) v1alpha1.ResourceGroupStatus {
173-
// Preserve existing status and only update fields that need to change.
174-
// This avoids repeatedly updating the status just to update a timestamp.
175-
// It also prevents fighting with other controllers updating the status.
176-
newStatus := *status.DeepCopy()
177-
// Update conditions, if they've changed
178-
newStatus.Conditions = updateCondition(newStatus.Conditions,
179-
newReconcilingCondition(v1alpha1.TrueConditionStatus, StartReconciling, startReconcilingMsg))
180-
newStatus.Conditions = updateCondition(newStatus.Conditions,
181-
newStalledCondition(v1alpha1.FalseConditionStatus, "", ""))
182-
return newStatus
183-
}
184-
185173
func (r *reconciler) endReconcilingStatus(
186174
ctx context.Context,
187175
id string,
@@ -205,20 +193,24 @@ func (r *reconciler) endReconcilingStatus(
205193
}()
206194
select {
207195
case <-finish:
196+
// Updating the ObservedGeneration should be a no-op, because the
197+
// applier should have updated it already.
208198
newStatus.ObservedGeneration = generation
209-
// Update conditions, if they've changed
210-
newStatus.Conditions = updateCondition(newStatus.Conditions,
211-
newReconcilingCondition(v1alpha1.FalseConditionStatus, FinishReconciling, finishReconcilingMsg))
212199
newStatus.Conditions = updateCondition(newStatus.Conditions,
213200
aggregateResourceStatuses(newStatus.ResourceStatuses))
214201
case <-time.After(reconcileTimeout):
215-
// Update conditions, if they've changed
216-
newStatus.Conditions = updateCondition(newStatus.Conditions,
217-
newReconcilingCondition(v1alpha1.FalseConditionStatus, ExceedTimeout, exceedTimeoutMsg))
218202
newStatus.Conditions = updateCondition(newStatus.Conditions,
219203
newStalledCondition(v1alpha1.TrueConditionStatus, ExceedTimeout, exceedTimeoutMsg))
220204
}
221205

206+
// Previously, the Reconciling condition toggled between True & False while
207+
// this controller was reconciling, but this caused unnecessary churn and
208+
// confusion, because it didn't reflect the reconciliation status of the
209+
// managed objects in status.resourceStatuses. So now it's always set to
210+
// False, for reverse compatibility, in case any clients were waiting it.
211+
newStatus.Conditions = updateCondition(newStatus.Conditions,
212+
newReconcilingCondition(v1alpha1.FalseConditionStatus, FinishReconciling, finishReconcilingMsg))
213+
222214
metrics.RecordReconcileDuration(ctx, newStatus.Conditions[1].Reason, startTime)
223215
updateResourceMetrics(ctx, namespacedName, newStatus.ResourceStatuses)
224216
return newStatus

0 commit comments

Comments
 (0)