Skip to content

Commit 34c8f17

Browse files
author
Christoph Bühler
committed
feat: add exponential backoff to restart watcher
1 parent 88cd7c4 commit 34c8f17

File tree

7 files changed

+62
-10
lines changed

7 files changed

+62
-10
lines changed

src/KubeOps/KubeOps.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525

2626
<ItemGroup>
2727
<PackageReference Include="CompareNETObjects" Version="4.66.0" />
28-
<PackageReference Include="KubernetesClient" Version="2.0.18" />
28+
<PackageReference Include="KubernetesClient" Version="2.0.21" />
2929
<PackageReference Include="McMaster.Extensions.CommandLineUtils" Version="3.0.0" />
3030
<PackageReference Include="McMaster.Extensions.Hosting.CommandLine" Version="3.0.0" />
3131
<PackageReference Include="YamlDotNet" Version="8.1.1" />

src/KubeOps/Operator/Watcher/EntityWatcher.cs

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,25 @@
77
using KubeOps.Operator.DependencyInjection;
88
using Microsoft.Extensions.DependencyInjection;
99
using Microsoft.Extensions.Logging;
10+
using Timer = System.Timers.Timer;
1011

1112
namespace KubeOps.Operator.Watcher
1213
{
1314
internal class EntityWatcher<TEntity> : IDisposable
1415
where TEntity : IKubernetesObject<V1ObjectMeta>
1516
{
17+
private const double MaxRetrySeconds = 64;
18+
19+
private int _errorCount = 0;
20+
1621
private readonly ILogger<EntityWatcher<TEntity>> _logger;
22+
private readonly Random _rnd = new Random();
1723
private CancellationTokenSource? _cancellation;
1824
private Watcher<TEntity>? _watcher;
1925

26+
private Timer? _reconnectTimer;
27+
private Timer? _resetErrCountTimer;
28+
2029
private readonly Lazy<IKubernetesClient> _client =
2130
new Lazy<IKubernetesClient>(() => DependencyInjector.Services.GetRequiredService<IKubernetesClient>());
2231

@@ -51,6 +60,8 @@ public void Dispose()
5160
WatcherEvent -= (EventHandler<(WatchEventType type, TEntity resource)>) handler;
5261
}
5362

63+
_reconnectTimer?.Dispose();
64+
_resetErrCountTimer?.Dispose();
5465
_cancellation?.Dispose();
5566
_watcher?.Dispose();
5667
_logger.LogTrace(@"Disposed resource watcher for type ""{type}"".", typeof(TEntity));
@@ -71,6 +82,19 @@ private async Task WatchResource()
7182
}
7283
}
7384

85+
_resetErrCountTimer = new Timer(TimeSpan.FromSeconds(10).TotalMilliseconds);
86+
_resetErrCountTimer.Elapsed += (_, __) =>
87+
{
88+
_logger.LogTrace("Reset error count in resource watcher.");
89+
_errorCount = 0;
90+
_resetErrCountTimer.Dispose();
91+
_resetErrCountTimer = null;
92+
_reconnectTimer?.Stop();
93+
_reconnectTimer?.Dispose();
94+
_reconnectTimer = null;
95+
};
96+
_resetErrCountTimer.Start();
97+
7498
_cancellation = new CancellationTokenSource();
7599
// TODO: namespaced resources
76100
_watcher = await _client.Value.Watch<TEntity>(
@@ -117,11 +141,19 @@ private void OnWatcherEvent(WatchEventType type, TEntity resource)
117141
private void OnException(Exception e)
118142
{
119143
_logger.LogError(e, @"There was an error while watching the resource ""{resource}"".", typeof(TEntity));
120-
// _logger.LogInformation("Trying to reconnect.");
121-
// RestartWatcher();
122144
_cancellation?.Cancel();
123145
_watcher?.Dispose();
124146
_watcher = null;
147+
148+
_logger.LogInformation("Trying to reconnect with exponential backoff.");
149+
_resetErrCountTimer?.Stop();
150+
_resetErrCountTimer?.Dispose();
151+
_resetErrCountTimer = null;
152+
_reconnectTimer?.Stop();
153+
_reconnectTimer?.Dispose();
154+
_reconnectTimer = new Timer(ExponentialBackoff(++_errorCount).TotalMilliseconds);
155+
_reconnectTimer.Elapsed += (_, __) => RestartWatcher();
156+
_reconnectTimer.Start();
125157
}
126158

127159
private void OnClose()
@@ -132,5 +164,9 @@ private void OnClose()
132164
RestartWatcher();
133165
}
134166
}
167+
168+
private TimeSpan ExponentialBackoff(int retryCount) => TimeSpan
169+
.FromSeconds(Math.Min(Math.Pow(2, retryCount), MaxRetrySeconds))
170+
.Add(TimeSpan.FromMilliseconds(_rnd.Next(0, 1000)));
135171
}
136172
}

tests/KubeOps.Test/KubeOps.Test.csproj

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@
77
</PropertyGroup>
88

99
<ItemGroup>
10-
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.5.0"/>
11-
<PackageReference Include="xunit" Version="2.4.0"/>
12-
<PackageReference Include="xunit.runner.visualstudio" Version="2.4.0"/>
13-
<PackageReference Include="coverlet.collector" Version="1.2.0"/>
10+
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.6.1" />
11+
<PackageReference Include="xunit" Version="2.4.1" />
12+
<PackageReference Include="xunit.runner.visualstudio" Version="2.4.1" />
13+
<PackageReference Include="coverlet.collector" Version="1.2.1" />
1414
</ItemGroup>
1515

1616
</Project>

tests/KubeOps.TestOperator/Entities/TestEntity.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ public class TestEntityStatus
1313
public string Status { get; set; } = string.Empty;
1414
}
1515

16-
[KubernetesEntity(Group = "testing", ApiVersion = "v1")]
16+
[KubernetesEntity(Group = "testing.dev", ApiVersion = "v1", PluralName = "testentities")]
1717
public class TestEntity : CustomKubernetesEntity<TestEntitySpec, TestEntityStatus>
1818
{
1919
}

tests/KubeOps.TestOperator/KubeOps.TestOperator.csproj

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,10 @@
99
<ProjectReference Include="..\..\src\KubeOps\KubeOps.csproj" />
1010
</ItemGroup>
1111

12+
<ItemGroup>
13+
<Content Include="appsettings.json">
14+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
15+
</Content>
16+
</ItemGroup>
17+
1218
</Project>

tests/KubeOps.TestOperator/Program.cs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
1-
using KubeOps.Operator;
1+
using System.Threading.Tasks;
2+
using KubeOps.Operator;
23
using KubeOps.TestOperator.Controller;
34
using KubeOps.TestOperator.Entities;
45

56
namespace KubeOps.TestOperator
67
{
78
public static class Program
89
{
9-
public static void Main(string[] args) => new KubernetesOperator()
10+
public static Task<int> Main(string[] args) => new KubernetesOperator()
1011
.ConfigureServices(services => { services.AddResourceController<TestController, TestEntity>(); })
1112
.Run(args);
1213
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{
2+
"Logging": {
3+
"LogLevel": {
4+
"Default": "Debug",
5+
"System": "Information",
6+
"Microsoft": "Information"
7+
}
8+
}
9+
}

0 commit comments

Comments
 (0)