Skip to content

Commit 5c5e8de

Browse files
author
Christoph Bühler
committed
feat(operations): add healthchecks and metrics.
This adds a prometheus metrics endpoint (configurable via operator settings) that collects information about the running system. Also healthchecks (divided into two categories "readiness" and "liveness") are added to check up your system. This closes #4. Signed-off-by: Christoph Bühler <[email protected]>
1 parent 907bd75 commit 5c5e8de

17 files changed

+564
-19
lines changed

src/KubeOps/KubeOps.csproj

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,10 @@
2828
<PackageReference Include="KubernetesClient" Version="2.0.29" />
2929
<PackageReference Include="McMaster.Extensions.CommandLineUtils" Version="3.0.0" />
3030
<PackageReference Include="McMaster.Extensions.Hosting.CommandLine" Version="3.0.0" />
31+
<PackageReference Include="Microsoft.Extensions.Diagnostics.HealthChecks" Version="3.1.7" />
3132
<PackageReference Include="Namotion.Reflection" Version="1.0.12" />
33+
<PackageReference Include="prometheus-net.AspNetCore" Version="3.6.0" />
34+
<PackageReference Include="prometheus-net.AspNetCore.HealthChecks" Version="3.6.0" />
3235
<PackageReference Include="YamlDotNet" Version="8.1.2" />
3336
</ItemGroup>
3437

src/KubeOps/Operator/Caching/ResourceCache.cs

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1-
using System.Collections.Generic;
1+
using System.Collections.Concurrent;
2+
using System.Collections.Generic;
23
using System.Linq;
34
using k8s;
45
using k8s.Models;
56
using KellermanSoftware.CompareNetObjects;
6-
using KubeOps.Operator.Entities;
7+
using KubeOps.Operator.DevOps;
78
using KubeOps.Operator.Entities.Extensions;
89

910
namespace KubeOps.Operator.Caching
@@ -23,7 +24,9 @@ internal class ResourceCache<TEntity> : IResourceCache<TEntity>
2324
MembersToIgnore = new List<string> { ResourceVersion },
2425
});
2526

26-
private readonly IDictionary<string, TEntity> _cache = new Dictionary<string, TEntity>();
27+
private readonly IDictionary<string, TEntity> _cache = new ConcurrentDictionary<string, TEntity>();
28+
29+
private readonly ResourceCacheMetrics<TEntity> _metrics = new ResourceCacheMetrics<TEntity>();
2730

2831
public TEntity Get(string id) => _cache[id];
2932

@@ -40,6 +43,8 @@ public TEntity Upsert(TEntity resource, out CacheComparisonResult result)
4043
_cache[resource.Metadata.Uid] = resource.DeepClone();
4144
}
4245

46+
_metrics.CachedItemsSize.Set(_cache.Count);
47+
_metrics.CachedItemsSummary.Observe(_cache.Count);
4348
return resource;
4449
}
4550

@@ -72,12 +77,22 @@ private CacheComparisonResult CompareCache(TEntity resource)
7277

7378
public void Remove(TEntity resource) => Remove(resource.Metadata.Uid);
7479

75-
public void Clear() => _cache.Clear();
80+
public void Clear()
81+
{
82+
_cache.Clear();
83+
_metrics.CachedItemsSize.Set(_cache.Count);
84+
_metrics.CachedItemsSummary.Observe(_cache.Count);
85+
}
7686

7787
private bool Exists(TEntity resource) => _cache.ContainsKey(resource.Metadata.Uid);
7888

7989
private bool Exists(string id) => _cache.ContainsKey(id);
8090

81-
private void Remove(string resourceUid) => _cache.Remove(resourceUid);
91+
private void Remove(string resourceUid)
92+
{
93+
_cache.Remove(resourceUid);
94+
_metrics.CachedItemsSize.Set(_cache.Count);
95+
_metrics.CachedItemsSummary.Observe(_cache.Count);
96+
}
8297
}
8398
}

src/KubeOps/Operator/Comparing/ReferenceEqualityComparer.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@ namespace KubeOps.Operator.Comparing
44
{
55
internal class ReferenceEqualityComparer : EqualityComparer<object>
66
{
7-
public override bool Equals(object x, object y) => ReferenceEquals(x, y);
7+
public override bool Equals(object? x, object? y) => ReferenceEquals(x, y);
88

9-
public override int GetHashCode(object obj) => obj == null ? 0 : obj.GetHashCode();
9+
public override int GetHashCode(object? obj) => obj == null ? 0 : obj.GetHashCode();
1010
}
1111
}

src/KubeOps/Operator/Controller/IResourceController.cs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,12 @@
44

55
namespace KubeOps.Operator.Controller
66
{
7-
public interface IResourceController<TEntity> : IHostedService
7+
public interface IResourceController : IHostedService
8+
{
9+
internal bool Running { get; }
10+
}
11+
12+
public interface IResourceController<TEntity> : IResourceController
813
where TEntity : IKubernetesObject<V1ObjectMeta>
914
{
1015
}

src/KubeOps/Operator/Controller/ResourceControllerBase.cs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ public abstract class ResourceControllerBase<TEntity> : IResourceController<TEnt
2929

3030
private readonly ILogger<ResourceControllerBase<TEntity>> _logger;
3131
private readonly IResourceEventQueue<TEntity> _eventQueue;
32+
private bool _running;
3233

3334
protected ResourceControllerBase()
3435
: this(
@@ -48,6 +49,8 @@ protected ResourceControllerBase(
4849
Client = client;
4950
}
5051

52+
bool IResourceController.Running => _running;
53+
5154
protected IKubernetesClient Client { get; }
5255

5356
public async Task StartAsync(CancellationToken cancellationToken)
@@ -56,6 +59,7 @@ public async Task StartAsync(CancellationToken cancellationToken)
5659

5760
_eventQueue.ResourceEvent += OnResourceEvent;
5861
await _eventQueue.Start();
62+
_running = true;
5963
}
6064

6165
public Task StopAsync(CancellationToken cancellationToken)
@@ -64,7 +68,7 @@ public Task StopAsync(CancellationToken cancellationToken)
6468

6569
_eventQueue.Stop();
6670
_eventQueue.ResourceEvent -= OnResourceEvent;
67-
71+
_running = false;
6872
return Task.CompletedTask;
6973
}
7074

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
using System.Collections.Generic;
2+
using System.Linq;
3+
using System.Threading;
4+
using System.Threading.Tasks;
5+
using KubeOps.Operator.Controller;
6+
using Microsoft.Extensions.Diagnostics.HealthChecks;
7+
using Microsoft.Extensions.Hosting;
8+
9+
namespace KubeOps.Operator.DevOps
10+
{
11+
internal class ControllerLivenessCheck : IHealthCheck
12+
{
13+
private readonly IList<IResourceController> _controller;
14+
15+
public ControllerLivenessCheck(IEnumerable<IHostedService> services)
16+
{
17+
_controller = services
18+
.Where(s => s is IResourceController)
19+
.OfType<IResourceController>()
20+
.ToList();
21+
}
22+
23+
public Task<HealthCheckResult> CheckHealthAsync(
24+
HealthCheckContext context,
25+
CancellationToken cancellationToken = new CancellationToken())
26+
{
27+
if (_controller.All(c => c.Running))
28+
{
29+
return Task.FromResult(HealthCheckResult.Healthy("all controllers are running."));
30+
}
31+
32+
return Task.FromResult(
33+
HealthCheckResult.Unhealthy(
34+
"some controllers are not running.",
35+
data: _controller.ToDictionary(
36+
c => c.GetType().Name,
37+
c => $"running: {c.Running}" as object)));
38+
}
39+
}
40+
}
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
using k8s;
2+
using k8s.Models;
3+
using KubeOps.Operator.DependencyInjection;
4+
using KubeOps.Operator.Entities.Extensions;
5+
using Microsoft.Extensions.DependencyInjection;
6+
using Prometheus;
7+
8+
namespace KubeOps.Operator.DevOps
9+
{
10+
internal class ResourceCacheMetrics<TEntity>
11+
where TEntity : IKubernetesObject<V1ObjectMeta>
12+
{
13+
private static readonly string[] Labels =
14+
{
15+
"operator",
16+
"kind",
17+
"group",
18+
"version",
19+
"scope"
20+
};
21+
22+
public ResourceCacheMetrics()
23+
{
24+
var settings = DependencyInjector.Services.GetRequiredService<OperatorSettings>();
25+
var crd = CustomEntityDefinitionExtensions.CreateResourceDefinition<TEntity>();
26+
var labelValues = new[]
27+
{
28+
settings.Name,
29+
crd.Kind,
30+
crd.Group,
31+
crd.Version,
32+
crd.Scope.ToString()
33+
};
34+
35+
CachedItemsSummary = Metrics
36+
.CreateSummary(
37+
"operator_resource_cached_items_size",
38+
"Summary of the cached items count over the last 10 minutes",
39+
Labels)
40+
.WithLabels(labelValues);
41+
42+
CachedItemsSize = Metrics
43+
.CreateGauge(
44+
"operator_resource_cached_items_count",
45+
"Total number of cached items in this resource cache",
46+
Labels)
47+
.WithLabels(labelValues);
48+
}
49+
50+
public Summary.Child CachedItemsSummary { get; }
51+
52+
public Gauge.Child CachedItemsSize { get; }
53+
}
54+
}

0 commit comments

Comments
 (0)