kubernetes-sigs
diff --git a/‎controllers/clustercache/cluster_accessor.go
Lines changed: 28 additions & 27 deletions b/‎controllers/clustercache/cluster_accessor.go
Lines changed: 28 additions & 27 deletions
diff --git a/‎controllers/clustercache/cluster_accessor_test.go
Lines changed: 13 additions & 8 deletions b/‎controllers/clustercache/cluster_accessor_test.go
Lines changed: 13 additions & 8 deletions
diff --git a/‎controllers/clustercache/cluster_cache.go
Lines changed: 28 additions & 14 deletions b/‎controllers/clustercache/cluster_cache.go
Lines changed: 28 additions & 14 deletions
diff --git a/‎controllers/clustercache/cluster_cache_fake.go
Lines changed: 3 additions & 3 deletions b/‎controllers/clustercache/cluster_cache_fake.go
Lines changed: 3 additions & 3 deletions
@@ -51,7 +51,7 @@ type clusterAccessor struct {
 	lockedStateLock sync.RWMutex
 
 	// lockedState is the state of the clusterAccessor. This includes the connection (e.g. client, cache)
-	// and health checking information (e.g. lastProbeSuccessTimestamp, consecutiveFailures).
+	// and health checking information (e.g. lastProbeSuccessTime, consecutiveFailures).
 	// lockedStateLock must be *always* held (via lock or rLock) before accessing this field.
 	lockedState clusterAccessorLockedState
 
@@ -151,11 +151,11 @@ type clusterAccessorHealthProbeConfig struct {
 }
 
 // clusterAccessorLockedState is the state of the clusterAccessor. This includes the connection (e.g. client, cache)
-// and health checking information (e.g. lastProbeSuccessTimestamp, consecutiveFailures).
+// and health checking information (e.g. lastProbeSuccessTime, consecutiveFailures).
 // lockedStateLock must be *always* held (via lock or rLock) before accessing this field.
 type clusterAccessorLockedState struct {
-	// lastConnectionCreationErrorTimestamp is the timestamp when connection creation failed the last time.
-	lastConnectionCreationErrorTimestamp time.Time
+	// lastConnectionCreationErrorTime is the time when connection creation failed the last time.
+	lastConnectionCreationErrorTime time.Time
 
 	// connection holds the connection state (e.g. client, cache) of the clusterAccessor.
 	connection *clusterAccessorLockedConnectionState
@@ -167,7 +167,7 @@ type clusterAccessorLockedState struct {
 	// private key in every single Reconcile.
 	clientCertificatePrivateKey *rsa.PrivateKey
 
-	// healthChecking holds the health checking state (e.g. lastProbeSuccessTimestamp, consecutiveFailures)
+	// healthChecking holds the health checking state (e.g. lastProbeSuccessTime, consecutiveFailures)
 	// of the clusterAccessor.
 	healthChecking clusterAccessorLockedHealthCheckingState
 }
@@ -201,14 +201,14 @@ type clusterAccessorLockedConnectionState struct {
 	watches sets.Set[string]
 }
 
-// clusterAccessorLockedHealthCheckingState holds the health checking state (e.g. lastProbeSuccessTimestamp,
+// clusterAccessorLockedHealthCheckingState holds the health checking state (e.g. lastProbeSuccessTime,
 // consecutiveFailures) of the clusterAccessor.
 type clusterAccessorLockedHealthCheckingState struct {
-	// lastProbeTimestamp is the time when the health probe was executed last.
-	lastProbeTimestamp time.Time
+	// lastProbeTime is the time when the health probe was executed last.
+	lastProbeTime time.Time
 
-	// lastProbeSuccessTimestamp is the time when the health probe was successfully executed last.
-	lastProbeSuccessTimestamp time.Time
+	// lastProbeSuccessTime is the time when the health probe was successfully executed last.
+	lastProbeSuccessTime time.Time
 
 	// consecutiveFailures is the number of consecutive health probe failures.
 	consecutiveFailures int
@@ -261,7 +261,11 @@ func (ca *clusterAccessor) Connect(ctx context.Context) (retErr error) {
 		if retErr != nil {
 			log.Error(retErr, "Connect failed")
 			connectionUp.WithLabelValues(ca.cluster.Name, ca.cluster.Namespace).Set(0)
-			ca.lockedState.lastConnectionCreationErrorTimestamp = time.Now()
+			ca.lockedState.lastConnectionCreationErrorTime = time.Now()
+			// A client creation just failed, so let's count this as a failed probe.
+			ca.lockedState.healthChecking.lastProbeTime = time.Now()
+			// Note: Intentionally not modifying lastProbeSuccessTime.
+			ca.lockedState.healthChecking.consecutiveFailures++
 		} else {
 			connectionUp.WithLabelValues(ca.cluster.Name, ca.cluster.Namespace).Set(1)
 		}
@@ -288,9 +292,9 @@ func (ca *clusterAccessor) Connect(ctx context.Context) (retErr error) {
 	now := time.Now()
 	ca.lockedState.healthChecking = clusterAccessorLockedHealthCheckingState{
 		// A client was just created successfully, so let's set the last probe times.
-		lastProbeTimestamp:        now,
-		lastProbeSuccessTimestamp: now,
-		consecutiveFailures:       0,
+		lastProbeTime:        now,
+		lastProbeSuccessTime: now,
+		consecutiveFailures:  0,
 	}
 	ca.lockedState.connection = &clusterAccessorLockedConnectionState{
 		restConfig:   connection.RESTConfig,
@@ -350,7 +354,7 @@ func (ca *clusterAccessor) HealthCheck(ctx context.Context) (bool, bool) {
 	ca.lock(ctx)
 	defer ca.unlock(ctx)
 
-	ca.lockedState.healthChecking.lastProbeTimestamp = time.Now()
+	ca.lockedState.healthChecking.lastProbeTime = time.Now()
 
 	unauthorizedErrorOccurred := false
 	switch {
@@ -371,7 +375,7 @@ func (ca *clusterAccessor) HealthCheck(ctx context.Context) (bool, bool) {
 		healthChecksTotal.WithLabelValues(ca.cluster.Name, ca.cluster.Namespace, "error").Inc()
 	default:
 		ca.lockedState.healthChecking.consecutiveFailures = 0
-		ca.lockedState.healthChecking.lastProbeSuccessTimestamp = ca.lockedState.healthChecking.lastProbeTimestamp
+		ca.lockedState.healthChecking.lastProbeSuccessTime = ca.lockedState.healthChecking.lastProbeTime
 		log.V(6).Info("Health probe succeeded")
 		healthCheck.WithLabelValues(ca.cluster.Name, ca.cluster.Namespace).Set(1)
 		healthChecksTotal.WithLabelValues(ca.cluster.Name, ca.cluster.Namespace, "success").Inc()
@@ -462,25 +466,22 @@ func (ca *clusterAccessor) Watch(ctx context.Context, watcher Watcher) error {
 	return nil
 }
 
-func (ca *clusterAccessor) GetLastProbeSuccessTimestamp(ctx context.Context) time.Time {
+func (ca *clusterAccessor) GetHealthCheckingState(ctx context.Context) HealthCheckingState {
 	ca.rLock(ctx)
 	defer ca.rUnlock(ctx)
 
-	return ca.lockedState.healthChecking.lastProbeSuccessTimestamp
-}
-
-func (ca *clusterAccessor) GetLastProbeTimestamp(ctx context.Context) time.Time {
-	ca.rLock(ctx)
-	defer ca.rUnlock(ctx)
-
-	return ca.lockedState.healthChecking.lastProbeTimestamp
+	return HealthCheckingState{
+		LastProbeTime:        ca.lockedState.healthChecking.lastProbeTime,
+		LastProbeSuccessTime: ca.lockedState.healthChecking.lastProbeSuccessTime,
+		ConsecutiveFailures:  ca.lockedState.healthChecking.consecutiveFailures,
+	}
 }
 
-func (ca *clusterAccessor) GetLastConnectionCreationErrorTimestamp(ctx context.Context) time.Time {
+func (ca *clusterAccessor) GetLastConnectionCreationErrorTime(ctx context.Context) time.Time {
 	ca.rLock(ctx)
 	defer ca.rUnlock(ctx)
 
-	return ca.lockedState.lastConnectionCreationErrorTimestamp
+	return ca.lockedState.lastConnectionCreationErrorTime
 }
 
 func (ca *clusterAccessor) rLock(ctx context.Context) {
 
@@ -81,9 +81,14 @@ func TestConnect(t *testing.T) {
 	g.Expect(err).To(HaveOccurred())
 	g.Expect(err.Error()).To(Equal("error creating REST config: error getting kubeconfig secret: Secret \"test-cluster-kubeconfig\" not found"))
 	g.Expect(accessor.Connected(ctx)).To(BeFalse())
-	g.Expect(accessor.lockedState.lastConnectionCreationErrorTimestamp.IsZero()).To(BeFalse())
-	accessor.lockedState.lastConnectionCreationErrorTimestamp = time.Time{} // so we can compare in the next line
-	g.Expect(accessor.lockedState).To(Equal(clusterAccessorLockedState{}))
+	g.Expect(accessor.lockedState.lastConnectionCreationErrorTime.IsZero()).To(BeFalse())
+	g.Expect(accessor.lockedState).To(Equal(clusterAccessorLockedState{
+		lastConnectionCreationErrorTime: accessor.lockedState.lastConnectionCreationErrorTime,
+		healthChecking: clusterAccessorLockedHealthCheckingState{
+			lastProbeTime:       accessor.lockedState.healthChecking.lastProbeTime,
+			consecutiveFailures: 1,
+		},
+	}))
 
 	// Create invalid kubeconfig Secret
 	kubeconfigBytes := kubeconfig.FromEnvTestConfig(env.Config, testCluster)
@@ -100,7 +105,7 @@ func TestConnect(t *testing.T) {
 	g.Expect(err).To(HaveOccurred())
 	g.Expect(err.Error()).To(Equal("error creating HTTP client and mapper: cluster is not reachable: the server could not find the requested resource"))
 	g.Expect(accessor.Connected(ctx)).To(BeFalse())
-	g.Expect(accessor.lockedState.lastConnectionCreationErrorTimestamp.IsZero()).To(BeFalse())
+	g.Expect(accessor.lockedState.lastConnectionCreationErrorTime.IsZero()).To(BeFalse())
 
 	// Cleanup invalid kubeconfig Secret
 	g.Expect(env.CleanupAndWait(ctx, kubeconfigSecret)).To(Succeed())
@@ -127,8 +132,8 @@ func TestConnect(t *testing.T) {
 
 	g.Expect(accessor.lockedState.clientCertificatePrivateKey).ToNot(BeNil())
 
-	g.Expect(accessor.lockedState.healthChecking.lastProbeTimestamp.IsZero()).To(BeFalse())
-	g.Expect(accessor.lockedState.healthChecking.lastProbeSuccessTimestamp.IsZero()).To(BeFalse())
+	g.Expect(accessor.lockedState.healthChecking.lastProbeTime.IsZero()).To(BeFalse())
+	g.Expect(accessor.lockedState.healthChecking.lastProbeSuccessTime.IsZero()).To(BeFalse())
 	g.Expect(accessor.lockedState.healthChecking.consecutiveFailures).To(Equal(0))
 
 	// Get client and test Get & List
@@ -201,8 +206,8 @@ func TestDisconnect(t *testing.T) {
 	// Verify health checking state was preserved
 	g.Expect(accessor.lockedState.clientCertificatePrivateKey).ToNot(BeNil())
 
-	g.Expect(accessor.lockedState.healthChecking.lastProbeTimestamp.IsZero()).To(BeFalse())
-	g.Expect(accessor.lockedState.healthChecking.lastProbeSuccessTimestamp.IsZero()).To(BeFalse())
+	g.Expect(accessor.lockedState.healthChecking.lastProbeTime.IsZero()).To(BeFalse())
+	g.Expect(accessor.lockedState.healthChecking.lastProbeSuccessTime.IsZero()).To(BeFalse())
 }
 
 func TestHealthCheck(t *testing.T) {
 
@@ -152,8 +152,8 @@ type ClusterCache interface {
 	// If there is no connection to the workload cluster ErrClusterNotConnected will be returned.
 	Watch(ctx context.Context, cluster client.ObjectKey, watcher Watcher) error
 
-	// GetLastProbeSuccessTimestamp returns the time when the health probe was successfully executed last.
-	GetLastProbeSuccessTimestamp(ctx context.Context, cluster client.ObjectKey) time.Time
+	// GetHealthCheckingState returns the health checking state of a Cluster.
+	GetHealthCheckingState(ctx context.Context, cluster client.ObjectKey) HealthCheckingState
 
 	// GetClusterSource returns a Source of Cluster events.
 	// The mapFunc will be used to map from Cluster to reconcile.Request.
@@ -166,6 +166,21 @@ type ClusterCache interface {
 	GetClusterSource(controllerName string, mapFunc func(ctx context.Context, cluster client.Object) []ctrl.Request, opts ...GetClusterSourceOption) source.Source
 }
 
+// HealthCheckingState holds the health checking state for a Cluster.
+type HealthCheckingState struct {
+	// LastProbeTime is the time when a health probe was executed last.
+	// Note: client creations are also counted as probes.
+	LastProbeTime time.Time
+
+	// LastProbeSuccessTime is the time when a health probe was successfully executed last.
+	// Note: client creations are also counted as probes.
+	LastProbeSuccessTime time.Time
+
+	// ConsecutiveFailures is the number of consecutive health probe failures.
+	// Note: client creations are also counted as probes.
+	ConsecutiveFailures int
+}
+
 // ErrClusterNotConnected is returned by the ClusterCache when e.g. a Client cannot be returned
 // because there is no connection to the workload cluster.
 var ErrClusterNotConnected = errors.New("connection to the workload cluster is down")
@@ -262,7 +277,7 @@ func (o *GetClusterSourceOptions) ApplyOptions(opts []GetClusterSourceOption) *G
 
 // WatchForProbeFailure will configure the Cluster source to enqueue reconcile.Requests if the health probe
 // didn't succeed for the configured duration.
-// For example if WatchForProbeFailure is set to 5m, an event will be sent if LastProbeSuccessTimestamp
+// For example if WatchForProbeFailure is set to 5m, an event will be sent if LastProbeSuccessTime
 // is 5m in the past (i.e. health probes didn't succeed in the last 5m).
 type WatchForProbeFailure time.Duration
 
@@ -353,11 +368,11 @@ type clusterSource struct {
 	// ch is the channel on which to send events.
 	ch chan event.GenericEvent
 
-	// sendEventAfterProbeFailureDurations are the durations after LastProbeSuccessTimestamp
+	// sendEventAfterProbeFailureDurations are the durations after LastProbeSuccessTime
 	// after which we have to send events.
 	sendEventAfterProbeFailureDurations []time.Duration
 
-	// lastEventSentTimeByCluster are the timestamps when we last sent an event for a cluster.
+	// lastEventSentTimeByCluster are the times when we last sent an event for a cluster.
 	lastEventSentTimeByCluster map[client.ObjectKey]time.Time
 }
 
@@ -401,12 +416,12 @@ func (cc *clusterCache) Watch(ctx context.Context, cluster client.ObjectKey, wat
 	return accessor.Watch(ctx, watcher)
 }
 
-func (cc *clusterCache) GetLastProbeSuccessTimestamp(ctx context.Context, cluster client.ObjectKey) time.Time {
+func (cc *clusterCache) GetHealthCheckingState(ctx context.Context, cluster client.ObjectKey) HealthCheckingState {
 	accessor := cc.getClusterAccessor(cluster)
 	if accessor == nil {
-		return time.Time{}
+		return HealthCheckingState{}
 	}
-	return accessor.GetLastProbeSuccessTimestamp(ctx)
+	return accessor.GetHealthCheckingState(ctx)
 }
 
 const (
@@ -452,10 +467,10 @@ func (cc *clusterCache) Reconcile(ctx context.Context, req reconcile.Request) (r
 	// Try to connect, if not connected.
 	connected := accessor.Connected(ctx)
 	if !connected {
-		lastConnectionCreationErrorTimestamp := accessor.GetLastConnectionCreationErrorTimestamp(ctx)
+		lastConnectionCreationErrorTime := accessor.GetLastConnectionCreationErrorTime(ctx)
 
 		// Requeue, if connection creation failed within the ConnectionCreationRetryInterval.
-		if requeueAfter, requeue := shouldRequeue(time.Now(), lastConnectionCreationErrorTimestamp, accessor.config.ConnectionCreationRetryInterval); requeue {
+		if requeueAfter, requeue := shouldRequeue(time.Now(), lastConnectionCreationErrorTime, accessor.config.ConnectionCreationRetryInterval); requeue {
 			log.V(6).Info(fmt.Sprintf("Requeuing after %s as connection creation already failed within the last %s",
 				requeueAfter.Truncate(time.Second/10), accessor.config.ConnectionCreationRetryInterval))
 			requeueAfterDurations = append(requeueAfterDurations, requeueAfter)
@@ -475,10 +490,10 @@ func (cc *clusterCache) Reconcile(ctx context.Context, req reconcile.Request) (r
 
 	// Run the health probe, if connected.
 	if connected {
-		lastProbeTimestamp := accessor.GetLastProbeTimestamp(ctx)
+		healthCheckingState := accessor.GetHealthCheckingState(ctx)
 
 		// Requeue, if health probe was already run within the HealthProbe.Interval.
-		if requeueAfter, requeue := shouldRequeue(time.Now(), lastProbeTimestamp, accessor.config.HealthProbe.Interval); requeue {
+		if requeueAfter, requeue := shouldRequeue(time.Now(), healthCheckingState.LastProbeTime, accessor.config.HealthProbe.Interval); requeue {
 			log.V(6).Info(fmt.Sprintf("Requeuing after %s as health probe was already run within the last %s",
 				requeueAfter.Truncate(time.Second/10), accessor.config.HealthProbe.Interval))
 			requeueAfterDurations = append(requeueAfterDurations, requeueAfter)
@@ -515,8 +530,7 @@ func (cc *clusterCache) Reconcile(ctx context.Context, req reconcile.Request) (r
 	}
 
 	// Send events to cluster sources.
-	lastProbeSuccessTime := accessor.GetLastProbeSuccessTimestamp(ctx)
-	cc.sendEventsToClusterSources(ctx, cluster, time.Now(), lastProbeSuccessTime, didConnect, didDisconnect)
+	cc.sendEventsToClusterSources(ctx, cluster, time.Now(), accessor.GetHealthCheckingState(ctx).LastProbeSuccessTime, didConnect, didDisconnect)
 
 	// Requeue based on requeueAfterDurations (fallback to defaultRequeueAfter).
 	return reconcile.Result{RequeueAfter: minDurationOrDefault(requeueAfterDurations, defaultRequeueAfter)}, nil
 
@@ -36,9 +36,9 @@ func NewFakeClusterCache(workloadClient client.Client, clusterKey client.ObjectK
 				watches:      sets.Set[string]{}.Insert(watchObjects...),
 			},
 			healthChecking: clusterAccessorLockedHealthCheckingState{
-				lastProbeTimestamp:        time.Now(),
-				lastProbeSuccessTimestamp: time.Now(),
-				consecutiveFailures:       0,
+				lastProbeTime:        time.Now(),
+				lastProbeSuccessTime: time.Now(),
+				consecutiveFailures:  0,
 			},
 		},
 	}