Skip to content

Commit 5cfb124

Browse files
Make ClusterResourceSet controller more predictable
1 parent 6aff954 commit 5cfb124

File tree

1 file changed

+21
-0
lines changed

1 file changed

+21
-0
lines changed

exp/addons/internal/controllers/clusterresourceset_controller.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,27 @@ func (r *ClusterResourceSetReconciler) Reconcile(ctx context.Context, req ctrl.R
176176

177177
// Return an aggregated error if errors occurred.
178178
if len(errs) > 0 {
179+
// When there are more than one ClusterResourceSet targeting the same cluster,
180+
// there might be conflict when reconciling those ClusterResourceSet in parallel because they all try to
181+
// patch the same ClusterResourceSetBinding Object.
182+
// In case of patching conflicts we don't want to go on exponential backlog, otherwise it might take an
183+
// arbitrary long time to get to stable state due to the backoff delay quickly growing.
184+
// Instead, we are requeing with an interval to make the system a little bit more predictable (and stabilize tests).
185+
// NOTE: The fact that we rely on conflict errors + requeue to reach the stable state isn't ideal, and
186+
// it might also become an issue at scale.
187+
// e.g. From an empirical observation, it takes 20s for 10 ClusterResourceSet to get to a stable state
188+
// on the same ClusterResourceSetBinding; with less ClusterResourceSet the issue is less relevant
189+
// (e.g. with 5 ClusterResourceSet it takes about 4 seconds).
190+
// NOTE: Conflicts happens mostly when ClusterResourceSetBinding is initialized / an entry is added for each
191+
// cluster resource set targeting the same cluster.
192+
for _, err := range errs {
193+
if aggregate, ok := err.(kerrors.Aggregate); ok {
194+
if len(aggregate.Errors()) == 1 && apierrors.IsConflict(aggregate.Errors()[0]) {
195+
log.Info("Conflict in patching a ClusterResourceSetBinding that is updated by more than one ClusterResourceSet, requeing")
196+
return ctrl.Result{RequeueAfter: 100 * time.Millisecond}, nil
197+
}
198+
}
199+
}
179200
return ctrl.Result{}, kerrors.NewAggregate(errs)
180201
}
181202

0 commit comments

Comments
 (0)