Skip to content

Commit 566db6f

Browse files
naimadswdnDamian Seredyndrivebyer
authored
fix: Reload RedisCluster instance after status updates conflict (OT-CONTAINER-KIT#1583)
* fix: Reload RedisCluster instance after status updates Fix race condition where status checks used stale instance data, preventing health checks from running. Reload instance after each status update to ensure subsequent checks have fresh status values. This fixes cluster state reporting issues where ReadyLeaderReplicas and ReadyFollowerReplicas checks always failed due to stale data. Signed-off-by: Damian Seredyn <[email protected]> * retry on conflict Signed-off-by: drivebyer <[email protected]> * update concurrent Signed-off-by: drivebyer <[email protected]> --------- Signed-off-by: Damian Seredyn <[email protected]> Signed-off-by: drivebyer <[email protected]> Co-authored-by: Damian Seredyn <[email protected]> Co-authored-by: drivebyer <[email protected]>
1 parent 66eaa2d commit 566db6f

File tree

2 files changed

+20
-3
lines changed

2 files changed

+20
-3
lines changed

.github/workflows/ci.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ jobs:
260260
- name: Install Redis Operator
261261
run: |
262262
make deploy IMG=redis-operator:e2e
263-
kubectl set env deployment/redis-operator-redis-operator -n redis-operator-system OPERATOR_IMAGE=redis-operator:e2e FEATURE_GATES=GenerateConfigInInitContainer=true
263+
kubectl set env deployment/redis-operator-redis-operator -n redis-operator-system OPERATOR_IMAGE=redis-operator:e2e FEATURE_GATES=GenerateConfigInInitContainer=true MAX_CONCURRENT_RECONCILES=10
264264
265265
- name: Wait for Redis Operator to be ready
266266
run: |

internal/controller/rediscluster/rediscluster_controller.go

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import (
3232
retry "github.com/avast/retry-go"
3333
appsv1 "k8s.io/api/apps/v1"
3434
corev1 "k8s.io/api/core/v1"
35+
apierrors "k8s.io/apimachinery/pkg/api/errors"
3536
"k8s.io/client-go/kubernetes"
3637
"k8s.io/client-go/tools/record"
3738
ctrl "sigs.k8s.io/controller-runtime"
@@ -300,7 +301,8 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu
300301
}
301302

302303
// Mark the cluster status as ready if all the leader and follower nodes are ready
303-
if instance.Status.ReadyLeaderReplicas == leaderReplicas && instance.Status.ReadyFollowerReplicas == followerReplicas {
304+
// and the cluster is not already in Ready state (to avoid unnecessary status updates)
305+
if instance.Status.ReadyLeaderReplicas == leaderReplicas && instance.Status.ReadyFollowerReplicas == followerReplicas && instance.Status.State != rcvb2.RedisClusterReady {
304306
monitoring.RedisClusterHealthy.WithLabelValues(instance.Namespace, instance.Name).Set(0)
305307
if k8sutils.RedisClusterStatusHealth(ctx, r.K8sClient, instance) {
306308
monitoring.RedisClusterHealthy.WithLabelValues(instance.Namespace, instance.Name).Set(1)
@@ -339,7 +341,22 @@ func (r *Reconciler) updateStatus(ctx context.Context, rc *rcvb2.RedisCluster, s
339341
copy := rc.DeepCopy()
340342
copy.Spec = rcvb2.RedisClusterSpec{}
341343
copy.Status = status
342-
return common.UpdateStatus(ctx, r.Client, copy)
344+
err := common.UpdateStatus(ctx, r.Client, copy)
345+
if err != nil && apierrors.IsConflict(err) {
346+
log.FromContext(ctx).Info("conflict detected, reloading instance and retrying status update")
347+
namespacedName := client.ObjectKey{
348+
Namespace: rc.Namespace,
349+
Name: rc.Name,
350+
}
351+
if err := r.Get(ctx, namespacedName, rc); err != nil {
352+
return err
353+
}
354+
copy = rc.DeepCopy()
355+
copy.Spec = rcvb2.RedisClusterSpec{}
356+
copy.Status = status
357+
return common.UpdateStatus(ctx, r.Client, copy)
358+
}
359+
return nil
343360
}
344361

345362
// SetupWithManager sets up the controller with the Manager.

0 commit comments

Comments
 (0)