Skip to content

Commit 62db9ad

Browse files
author
Christoph Bühler
committed
feat(operator): add better error handling and reconnection logic
1 parent 003950a commit 62db9ad

File tree

8 files changed

+27
-131
lines changed

8 files changed

+27
-131
lines changed

_old/src/KubeOps/Operator/Errors/BackoffStrategies.cs

Lines changed: 0 additions & 28 deletions
This file was deleted.

_old/src/KubeOps/Operator/Errors/CrdConversionException.cs

Lines changed: 0 additions & 18 deletions
This file was deleted.

_old/src/KubeOps/Operator/Errors/CrdPropertyTypeException.cs

Lines changed: 0 additions & 19 deletions
This file was deleted.

examples/Operator/Controller/V1SecondEntityController.cs

Lines changed: 0 additions & 31 deletions
This file was deleted.

examples/Operator/Entities/V1SecondEntity.cs

Lines changed: 0 additions & 11 deletions
This file was deleted.

examples/Operator/Finalizer/FinalizerTwo.cs

Lines changed: 0 additions & 13 deletions
This file was deleted.

examples/Operator/todos.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
11
todo:
2-
- error handling
32
- web: webhooks
43
- docs

src/KubeOps.Operator/Watcher/ResourceWatcher{TEntity}.cs

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ internal class ResourceWatcher<TEntity> : IHostedService
2929
private readonly ConcurrentDictionary<string, long> _entityCache = new();
3030
private readonly Lazy<List<FinalizerRegistration>> _finalizers;
3131
private bool _stopped;
32+
private uint _watcherReconnectRetries;
3233

3334
private Watcher<TEntity>? _watcher;
3435

@@ -81,21 +82,14 @@ private void WatchResource()
8182
}
8283
}
8384

85+
_logger.LogDebug("""Create watcher for entity of type "{type}".""", typeof(TEntity));
8486
_watcher = _client.Watch(OnEvent, OnError, OnClosed, @namespace: _settings.Namespace);
8587
}
8688

8789
private void StopWatching()
8890
{
8991
_watcher?.Dispose();
90-
}
91-
92-
private void OnClosed()
93-
{
94-
_logger.LogDebug("The server closed the connection.");
95-
if (!_stopped)
96-
{
97-
WatchResource();
98-
}
92+
_watcher = null;
9993
}
10094

10195
private async void OnEntityRequeue(object? sender, (string Name, string? Namespace) queued)
@@ -116,7 +110,7 @@ private async void OnEntityRequeue(object? sender, (string Name, string? Namespa
116110
await ReconcileModification(entity);
117111
}
118112

119-
private void OnError(Exception e)
113+
private async void OnError(Exception e)
120114
{
121115
switch (e)
122116
{
@@ -138,11 +132,34 @@ e.InnerException is EndOfStreamException &&
138132
}
139133

140134
_logger.LogError(e, """There was an error while watching the resource "{resource}".""", typeof(TEntity));
135+
StopWatching();
136+
_watcherReconnectRetries++;
137+
138+
var delay = TimeSpan
139+
.FromSeconds(Math.Pow(2, Math.Clamp(_watcherReconnectRetries, 0, 5)))
140+
.Add(TimeSpan.FromMilliseconds(new Random().Next(0, 1000)));
141+
_logger.LogWarning(
142+
"There were {retries} errors / retries in the watcher. Wait {seconds}s before next attempt to connect.",
143+
_watcherReconnectRetries,
144+
delay.TotalSeconds);
145+
await Task.Delay(delay);
146+
141147
WatchResource();
142148
}
143149

150+
private void OnClosed()
151+
{
152+
_logger.LogDebug("The watcher was closed.");
153+
if (!_stopped && _watcherReconnectRetries == 0)
154+
{
155+
WatchResource();
156+
}
157+
}
158+
144159
private async void OnEvent(WatchEventType type, TEntity entity)
145160
{
161+
_watcherReconnectRetries = 0;
162+
146163
_logger.LogTrace(
147164
"""Received watch event "{eventType}" for "{kind}/{name}".""",
148165
type,

0 commit comments

Comments
 (0)