@@ -260,7 +260,10 @@ func (ca *clusterAccessor) Connect(ctx context.Context) (retErr error) {
260
260
defer func () {
261
261
if retErr != nil {
262
262
log .Error (retErr , "Connect failed" )
263
+ connectionUp .WithLabelValues (ca .cluster .Name , ca .cluster .Namespace ).Set (0 )
263
264
ca .lockedState .lastConnectionCreationErrorTimestamp = time .Now ()
265
+ } else {
266
+ connectionUp .WithLabelValues (ca .cluster .Name , ca .cluster .Namespace ).Set (1 )
264
267
}
265
268
}()
266
269
@@ -303,15 +306,17 @@ func (ca *clusterAccessor) Connect(ctx context.Context) (retErr error) {
303
306
// Disconnect disconnects a connection to the workload cluster.
304
307
func (ca * clusterAccessor ) Disconnect (ctx context.Context ) {
305
308
log := ctrl .LoggerFrom (ctx )
306
-
307
309
if ! ca .Connected (ctx ) {
308
310
log .V (6 ).Info ("Skipping disconnect, already disconnected" )
309
311
return
310
312
}
311
313
312
314
ca .lock (ctx )
313
- defer ca .unlock (ctx )
314
315
316
+ defer func () {
317
+ ca .unlock (ctx )
318
+ connectionUp .WithLabelValues (ca .cluster .Name , ca .cluster .Namespace ).Set (0 )
319
+ }()
315
320
log .Info ("Disconnecting" )
316
321
317
322
// Stopping the cache is non-blocking, so it's okay to do it while holding the lock.
@@ -356,14 +361,20 @@ func (ca *clusterAccessor) HealthCheck(ctx context.Context) (bool, bool) {
356
361
unauthorizedErrorOccurred = true
357
362
ca .lockedState .healthChecking .consecutiveFailures ++
358
363
log .V (6 ).Info (fmt .Sprintf ("Health probe failed (unauthorized error occurred): %v" , err ))
364
+ healthCheck .WithLabelValues (ca .cluster .Name , ca .cluster .Namespace ).Set (0 )
365
+ healthChecksTotal .WithLabelValues (ca .cluster .Name , ca .cluster .Namespace , "error" ).Inc ()
359
366
case err != nil :
360
367
ca .lockedState .healthChecking .consecutiveFailures ++
361
368
log .V (6 ).Info (fmt .Sprintf ("Health probe failed (%d/%d): %v" ,
362
369
ca .lockedState .healthChecking .consecutiveFailures , ca .config .HealthProbe .FailureThreshold , err ))
370
+ healthCheck .WithLabelValues (ca .cluster .Name , ca .cluster .Namespace ).Set (0 )
371
+ healthChecksTotal .WithLabelValues (ca .cluster .Name , ca .cluster .Namespace , "error" ).Inc ()
363
372
default :
364
373
ca .lockedState .healthChecking .consecutiveFailures = 0
365
374
ca .lockedState .healthChecking .lastProbeSuccessTimestamp = ca .lockedState .healthChecking .lastProbeTimestamp
366
375
log .V (6 ).Info ("Health probe succeeded" )
376
+ healthCheck .WithLabelValues (ca .cluster .Name , ca .cluster .Namespace ).Set (1 )
377
+ healthChecksTotal .WithLabelValues (ca .cluster .Name , ca .cluster .Namespace , "success" ).Inc ()
367
378
}
368
379
369
380
tooManyConsecutiveFailures := ca .lockedState .healthChecking .consecutiveFailures >= ca .config .HealthProbe .FailureThreshold
0 commit comments