@@ -260,7 +260,10 @@ func (ca *clusterAccessor) Connect(ctx context.Context) (retErr error) {
260260 defer func () {
261261 if retErr != nil {
262262 log .Error (retErr , "Connect failed" )
263+ connectionUp .WithLabelValues (ca .cluster .Name , ca .cluster .Namespace ).Set (0 )
263264 ca .lockedState .lastConnectionCreationErrorTimestamp = time .Now ()
265+ } else {
266+ connectionUp .WithLabelValues (ca .cluster .Name , ca .cluster .Namespace ).Set (1 )
264267 }
265268 }()
266269
@@ -303,15 +306,17 @@ func (ca *clusterAccessor) Connect(ctx context.Context) (retErr error) {
303306// Disconnect disconnects a connection to the workload cluster.
304307func (ca * clusterAccessor ) Disconnect (ctx context.Context ) {
305308 log := ctrl .LoggerFrom (ctx )
306-
307309 if ! ca .Connected (ctx ) {
308310 log .V (6 ).Info ("Skipping disconnect, already disconnected" )
309311 return
310312 }
311313
312314 ca .lock (ctx )
313- defer ca .unlock (ctx )
314315
316+ defer func () {
317+ ca .unlock (ctx )
318+ connectionUp .WithLabelValues (ca .cluster .Name , ca .cluster .Namespace ).Set (0 )
319+ }()
315320 log .Info ("Disconnecting" )
316321
317322 // Stopping the cache is non-blocking, so it's okay to do it while holding the lock.
@@ -356,14 +361,20 @@ func (ca *clusterAccessor) HealthCheck(ctx context.Context) (bool, bool) {
356361 unauthorizedErrorOccurred = true
357362 ca .lockedState .healthChecking .consecutiveFailures ++
358363 log .V (6 ).Info (fmt .Sprintf ("Health probe failed (unauthorized error occurred): %v" , err ))
364+ healthCheck .WithLabelValues (ca .cluster .Name , ca .cluster .Namespace ).Set (0 )
365+ healthChecksTotal .WithLabelValues (ca .cluster .Name , ca .cluster .Namespace , "error" ).Inc ()
359366 case err != nil :
360367 ca .lockedState .healthChecking .consecutiveFailures ++
361368 log .V (6 ).Info (fmt .Sprintf ("Health probe failed (%d/%d): %v" ,
362369 ca .lockedState .healthChecking .consecutiveFailures , ca .config .HealthProbe .FailureThreshold , err ))
370+ healthCheck .WithLabelValues (ca .cluster .Name , ca .cluster .Namespace ).Set (0 )
371+ healthChecksTotal .WithLabelValues (ca .cluster .Name , ca .cluster .Namespace , "error" ).Inc ()
363372 default :
364373 ca .lockedState .healthChecking .consecutiveFailures = 0
365374 ca .lockedState .healthChecking .lastProbeSuccessTimestamp = ca .lockedState .healthChecking .lastProbeTimestamp
366375 log .V (6 ).Info ("Health probe succeeded" )
376+ healthCheck .WithLabelValues (ca .cluster .Name , ca .cluster .Namespace ).Set (1 )
377+ healthChecksTotal .WithLabelValues (ca .cluster .Name , ca .cluster .Namespace , "success" ).Inc ()
367378 }
368379
369380 tooManyConsecutiveFailures := ca .lockedState .healthChecking .consecutiveFailures >= ca .config .HealthProbe .FailureThreshold
0 commit comments