Skip to content

Commit 0965d23

Browse files
authored
fix(leader-election): introduce delay before attempting to reacquire leadership (#794)
Fixes #785. Introduce a retry delay before attempting to reacquire leadership, fixing the log noise generated from the API server being unavailable.
1 parent d026e3b commit 0965d23

File tree

2 files changed

+12
-3
lines changed

2 files changed

+12
-3
lines changed

src/KubeOps.Operator/LeaderElection/LeaderElectionBackgroundService.cs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@ static async ValueTask CastAndDispose(IDisposable resource)
8282

8383
private async Task RunAndTryToHoldLeadershipForeverAsync()
8484
{
85+
uint leadershipRetries = 0;
86+
8587
while (!_cts.IsCancellationRequested)
8688
{
8789
try
@@ -94,7 +96,14 @@ private async Task RunAndTryToHoldLeadershipForeverAsync()
9496
}
9597
catch (Exception exception)
9698
{
97-
logger.LogError(exception, "Failed to hold leadership.");
99+
leadershipRetries++;
100+
101+
var delay = TimeSpan
102+
.FromSeconds(Math.Pow(2, Math.Clamp(leadershipRetries, 0, 5)))
103+
.Add(TimeSpan.FromMilliseconds(new Random().Next(0, 1000)));
104+
105+
logger.LogError(exception, "Failed to hold leadership. Wait {Seconds}s before attempting to reacquire leadership.", delay.TotalSeconds);
106+
await Task.Delay(delay);
98107
}
99108
}
100109
}

test/KubeOps.Operator.Test/LeaderElector/LeaderElectionBackgroundService.Test.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,8 @@ public async Task Elector_Throws_Should_Retry()
5050
await leaderElectionBackgroundService.StartAsync(CancellationToken.None);
5151

5252
// Starting the background service should result in the lock attempt throwing, and then a subsequent attempt being made.
53-
// Wait for the subsequent event to be signalled, if we time out the test fails.
54-
electionLockSubsequentCallEvent.WaitOne(TimeSpan.FromMilliseconds(500)).Should().BeTrue();
53+
// Wait for the subsequent event to be signalled, if we time out the test fails. The retry delay requires us to wait at least 3 seconds.
54+
electionLockSubsequentCallEvent.WaitOne(TimeSpan.FromMilliseconds(3100)).Should().BeTrue();
5555

5656
await leaderElectionBackgroundService.StopAsync(CancellationToken.None);
5757
}

0 commit comments

Comments
 (0)