Skip to content

Commit 8ccb67c

Browse files
authored
Merge pull request #8936 from sbueringer/pr-cluster-topology-cached-get
🌱 cluster/topology: use cached Cluster get in Reconcile
2 parents 23e5d42 + abdcd98 commit 8ccb67c

File tree

2 files changed

+21
-7
lines changed

2 files changed

+21
-7
lines changed

internal/controllers/topology/cluster/cluster_controller.go

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -135,11 +135,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re
135135

136136
// Fetch the Cluster instance.
137137
cluster := &clusterv1.Cluster{}
138-
// Use the live client here so that we do not reconcile a stale cluster object.
139-
// Example: If 2 reconcile loops are triggered in quick succession (one from the cluster and the other from the clusterclass)
140-
// the first reconcile loop could update the cluster object (set the infrastructure cluster ref and control plane ref). If we
141-
// do not use the live client the second reconcile loop could potentially pick up the stale cluster object from the cache.
142-
if err := r.APIReader.Get(ctx, req.NamespacedName, cluster); err != nil {
138+
if err := r.Client.Get(ctx, req.NamespacedName, cluster); err != nil {
143139
if apierrors.IsNotFound(err) {
144140
return ctrl.Result{}, nil
145141
}

internal/controllers/topology/cluster/reconcile_state.go

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,24 @@ func (r *Reconciler) reconcileCluster(ctx context.Context, s *scope.Scope) error
441441
return errors.Wrapf(err, "failed to patch %s", tlog.KObj{Obj: s.Current.Cluster})
442442
}
443443
r.recorder.Eventf(s.Current.Cluster, corev1.EventTypeNormal, updateEventReason, "Updated %q", tlog.KObj{Obj: s.Current.Cluster})
444+
445+
// Wait until Cluster is updated in the cache.
446+
// Note: We have to do this because otherwise using a cached client in the Reconcile func could
447+
// return a stale state of the Cluster we just patched (because the cache might be stale).
448+
// Note: It is good enough to check that the resource version changed. Other controllers might have updated the
449+
// Cluster as well, but the combination of the patch call above without a conflict and a changed resource
450+
// version here guarantees that we see the changes of our own update.
451+
err = wait.PollUntilContextTimeout(ctx, 5*time.Millisecond, 5*time.Second, true, func(ctx context.Context) (bool, error) {
452+
key := client.ObjectKey{Namespace: s.Current.Cluster.GetNamespace(), Name: s.Current.Cluster.GetName()}
453+
cachedCluster := &clusterv1.Cluster{}
454+
if err := r.Client.Get(ctx, key, cachedCluster); err != nil {
455+
return false, err
456+
}
457+
return s.Current.Cluster.GetResourceVersion() != cachedCluster.GetResourceVersion(), nil
458+
})
459+
if err != nil {
460+
return errors.Wrapf(err, "failed waiting for Cluster %s to be updated in the cache after patch", tlog.KObj{Obj: s.Current.Cluster})
461+
}
444462
return nil
445463
}
446464

@@ -578,7 +596,7 @@ func (r *Reconciler) createMachineDeployment(ctx context.Context, s *scope.Scope
578596
return true, nil
579597
})
580598
if err != nil {
581-
return errors.Wrapf(err, "failed to create %s: failed waiting for MachineDeployment to be visible in cache", md.Object.Kind)
599+
return errors.Wrapf(err, "failed waiting for MachineDeployment %s to be visible in the cache after create", md.Object.Kind)
582600
}
583601

584602
// If the MachineDeployment has defined a MachineHealthCheck reconcile it.
@@ -667,7 +685,7 @@ func (r *Reconciler) updateMachineDeployment(ctx context.Context, s *scope.Scope
667685
return currentMD.Object.GetResourceVersion() != cachedMD.GetResourceVersion(), nil
668686
})
669687
if err != nil {
670-
return errors.Wrapf(err, "failed to patch %s: failed waiting for MachineDeployment to be updated in cache", tlog.KObj{Obj: currentMD.Object})
688+
return errors.Wrapf(err, "failed waiting for MachineDeployment %s to be updated in the cache after patch", tlog.KObj{Obj: currentMD.Object})
671689
}
672690

673691
// We want to call both cleanup functions even if one of them fails to clean up as much as possible.

0 commit comments

Comments
 (0)