Skip to content

Commit 694c8f2

Browse files
authored
fix: ResourceGroup controller stuck retrying (#1573)
Let the controller-manager handle reconcile retries with shared backoff, instead of trying to handle retrying in the ResourceGroup reconciler. This should also help avoid some conflicts from updating with stale inputs.
1 parent 37a2e21 commit 694c8f2

File tree

2 files changed

+19
-18
lines changed

2 files changed

+19
-18
lines changed

pkg/resourcegroup/controllers/resourcegroup/resourcegroup_controller.go

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ import (
2828
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
2929
"k8s.io/apimachinery/pkg/runtime/schema"
3030
"k8s.io/apimachinery/pkg/types"
31-
"k8s.io/client-go/util/retry"
3231
"kpt.dev/configsync/pkg/api/kpt.dev/v1alpha1"
3332
"kpt.dev/configsync/pkg/reconcilermanager/controllers"
3433
"kpt.dev/configsync/pkg/resourcegroup/controllers/handler"
@@ -153,16 +152,21 @@ func updateResourceMetrics(ctx context.Context, nn types.NamespacedName, statuse
153152
metrics.RecordKCCResourceCount(ctx, nn, int64(kccCount))
154153
}
155154

155+
// updateStatusKptGroup updates the ResourceGroup status.
156+
// To avoid unnecessary updates, it sorts the conditions and checks for a diff.
157+
//
158+
// This method explicitly does not retry on conflict errors, to allow the
159+
// controller manager to handle retry backoff, which it does for all errors,
160+
// not just conflicts. This allows the reconciler to pick up new changes between
161+
// update attempts, instead of getting stuck here retrying endlessly.
156162
func (r *reconciler) updateStatusKptGroup(ctx context.Context, resgroup *v1alpha1.ResourceGroup, newStatus v1alpha1.ResourceGroupStatus) error {
157163
newStatus.Conditions = adjustConditionOrder(newStatus.Conditions)
158-
return retry.RetryOnConflict(retry.DefaultBackoff, func() error {
159-
if apiequality.Semantic.DeepEqual(resgroup.Status, newStatus) {
160-
return nil
161-
}
162-
resgroup.Status = newStatus
163-
// Use `r.Status().Update()` here instead of `r.Update()` to update only resgroup.Status.
164-
return r.client.Status().Update(ctx, resgroup, client.FieldOwner(FieldManager))
165-
})
164+
if apiequality.Semantic.DeepEqual(resgroup.Status, newStatus) {
165+
return nil
166+
}
167+
resgroup.Status = newStatus
168+
// Use `r.Status().Update()` here instead of `r.Update()` to update only resgroup.Status.
169+
return r.client.Status().Update(ctx, resgroup, client.FieldOwner(FieldManager))
166170
}
167171

168172
func (r *reconciler) startReconcilingStatus(status v1alpha1.ResourceGroupStatus) v1alpha1.ResourceGroupStatus {

pkg/resourcegroup/controllers/root/root_controller.go

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ import (
2525
"k8s.io/apimachinery/pkg/runtime/schema"
2626
"k8s.io/apimachinery/pkg/types"
2727
"k8s.io/client-go/rest"
28-
"k8s.io/client-go/util/retry"
2928
"kpt.dev/configsync/pkg/api/kpt.dev/v1alpha1"
3029
"kpt.dev/configsync/pkg/metadata"
3130
"kpt.dev/configsync/pkg/reconcilermanager/controllers"
@@ -151,14 +150,12 @@ func (r *Reconciler) updateWatches(ctx context.Context, gks []schema.GroupKind)
151150
func (r *Reconciler) reconcileDisabledResourceGroup(ctx context.Context, req ctrl.Request, resgroup *v1alpha1.ResourceGroup) (ctrl.Result, error) {
152151
// clean the existing .status field
153152
emptyStatus := v1alpha1.ResourceGroupStatus{}
154-
err := retry.RetryOnConflict(retry.DefaultBackoff, func() error {
155-
if apiequality.Semantic.DeepEqual(resgroup.Status, emptyStatus) {
156-
return nil
157-
}
158-
resgroup.Status = emptyStatus
159-
// Use `r.Status().Update()` here instead of `r.Update()` to update only resgroup.Status.
160-
return r.client.Status().Update(ctx, resgroup, client.FieldOwner(resourcegroup.FieldManager))
161-
})
153+
if apiequality.Semantic.DeepEqual(resgroup.Status, emptyStatus) {
154+
return ctrl.Result{}, nil
155+
}
156+
resgroup.Status = emptyStatus
157+
// Use `r.Status().Update()` here instead of `r.Update()` to update only resgroup.Status.
158+
err := r.client.Status().Update(ctx, resgroup, client.FieldOwner(resourcegroup.FieldManager))
162159
if err != nil {
163160
return ctrl.Result{}, err
164161
}

0 commit comments

Comments
 (0)