Skip to content

Commit 05c94bd

Browse files
authored
Fix Gossip composite consensus test timing (#2367)
* Fix Gossip composite consensus test timing * Extract topology consensus wait helper * refactor: dispose fixtures directly * test: cover ExpectUpdatedTopologyConsensus
1 parent 911ee75 commit 05c94bd

File tree

8 files changed

+110
-14
lines changed

8 files changed

+110
-14
lines changed

logs/log1755980804.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
## Fix flakiness in CompositeConsensusWorks
2+
- Removed skip from the test to re-enable it.
3+
- Eliminated arbitrary delays and used topology consensus to wait for cluster stability.
4+
- Added polling loop after spawning a member to ensure topology hash updates before asserting.
5+
- Confirmed reliability by running targeted test multiple times and entire test suites.

logs/log1756009491.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
- Added ExpectUpdatedTopologyConsensus helper to Proto.TestKit to wait for topology changes without manual polling.
2+
- Updated composite gossip consensus test to use new helper instead of inline loop, improving readability and reducing timing flakiness.
3+
- Exposed Proto.Cluster internals to Proto.TestKit for access to topology consensus check.

logs/log1756009684.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
## Summary
2+
- Simplified gossip tests by directly disposing fixtures with `await using` instead of unused `cleanup` variables
3+
4+
## Motivation
5+
- Unused `cleanup` variables were redundant and flagged in review; using `await using` on the fixture itself clarifies intent and removes dead code

logs/log1756010588.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# Change Log
2+
- Added dedicated tests for `ExpectUpdatedTopologyConsensus` to verify completion on topology changes and timeout behavior
3+
- Removed redundant topology hash assertion from `GossipTests.CompositeConsensusWorks` to rely solely on the helper

src/Proto.Cluster/Properties/AssemblyInfo.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,5 @@
77
using System.Runtime.CompilerServices;
88

99
[assembly: InternalsVisibleTo("Proto.Cluster.Tests")]
10-
[assembly: InternalsVisibleTo("Proto.Premium")]
10+
[assembly: InternalsVisibleTo("Proto.Premium")]
11+
[assembly: InternalsVisibleTo("Proto.TestKit")]
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
using System;
2+
using System.Threading.Tasks;
3+
using Proto;
4+
using Proto.Cluster;
5+
6+
namespace Proto.TestKit;
7+
8+
/// <summary>
9+
/// Extension methods for cluster-related test helpers.
10+
/// </summary>
11+
public static class ClusterTestKitExtensions
12+
{
13+
/// <summary>
14+
/// Waits until the cluster's topology consensus hash changes.
15+
/// </summary>
16+
/// <param name="cluster">Cluster to monitor.</param>
17+
/// <param name="timeout">Maximum time to wait for the update. Defaults to 20 seconds.</param>
18+
/// <returns>The new topology hash once it differs from the initial hash.</returns>
19+
public static async Task<ulong> ExpectUpdatedTopologyConsensus(this Proto.Cluster.Cluster cluster, TimeSpan? timeout = null)
20+
{
21+
var waitTimeout = timeout ?? TimeSpan.FromSeconds(20);
22+
// Allow each consensus check up to five seconds to complete
23+
var ct = CancellationTokens.FromSeconds(5);
24+
25+
// Capture the initial topology hash
26+
(_, var initialTopologyHash) = await cluster.MemberList.TopologyConsensus(ct).ConfigureAwait(false);
27+
ulong updatedTopologyHash = initialTopologyHash;
28+
29+
await TestKit.AwaitConditionAsync(async () =>
30+
{
31+
(_, updatedTopologyHash) = await cluster.MemberList.TopologyConsensus(ct).ConfigureAwait(false);
32+
return updatedTopologyHash != initialTopologyHash;
33+
}, waitTimeout, $"Topology hash did not change within {waitTimeout}");
34+
35+
return updatedTopologyHash;
36+
}
37+
}
38+
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
using System;
2+
using System.Linq;
3+
using System.Threading.Tasks;
4+
using FluentAssertions;
5+
using Proto.TestKit;
6+
using Proto.Utils;
7+
using Xunit;
8+
9+
namespace Proto.Cluster.Tests;
10+
11+
public class ExpectUpdatedTopologyConsensusTests
12+
{
13+
[Fact]
14+
public async Task Completes_when_topology_changes()
15+
{
16+
await using var clusterFixture = new InMemoryClusterFixture();
17+
await clusterFixture.InitializeAsync();
18+
19+
// Capture the initial topology hash
20+
(_, var initialHash) = await clusterFixture.Members.First()
21+
.MemberList.TopologyConsensus(CancellationTokens.FromSeconds(5));
22+
23+
var updateTask = clusterFixture.Members.First().ExpectUpdatedTopologyConsensus();
24+
25+
await clusterFixture.SpawnMember();
26+
27+
var newHash = await updateTask;
28+
29+
newHash.Should().NotBe(initialHash);
30+
}
31+
32+
[Fact]
33+
public async Task Throws_when_topology_does_not_change()
34+
{
35+
await using var clusterFixture = new InMemoryClusterFixture();
36+
await clusterFixture.InitializeAsync();
37+
38+
var updateTask = clusterFixture.Members.First()
39+
.ExpectUpdatedTopologyConsensus(TimeSpan.FromMilliseconds(200));
40+
41+
await Assert.ThrowsAsync<TimeoutException>(() => updateTask);
42+
}
43+
}

tests/Proto.Cluster.Tests/GossipTests.cs

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
using ClusterTest.Messages;
1313
using FluentAssertions;
1414
using Proto.Cluster.Gossip;
15+
using Proto.TestKit;
1516
using Xunit;
1617
using Xunit.Abstractions;
1718

@@ -34,8 +35,7 @@ public GossipTests(ITestOutputHelper testOutputHelper)
3435
[Fact]
3536
public async Task CanGetConsensus()
3637
{
37-
var clusterFixture = new InMemoryClusterFixture();
38-
await using var _ = clusterFixture;
38+
await using var clusterFixture = new InMemoryClusterFixture();
3939
await clusterFixture.InitializeAsync();
4040

4141
const string initialValue = "hello consensus";
@@ -50,17 +50,14 @@ public async Task CanGetConsensus()
5050

5151
}
5252

53-
[Fact(Skip = "Flaky")]
53+
[Fact]
5454
public async Task CompositeConsensusWorks()
5555
{
5656
var timeout = CancellationTokens.FromSeconds(20);
57-
var clusterFixture = new InMemoryClusterFixture();
58-
await using var _ = clusterFixture;
57+
await using var clusterFixture = new InMemoryClusterFixture();
5958
await clusterFixture.InitializeAsync();
6059

61-
// Allow cluster to settle before verifying consensus
62-
await Task.Delay(1000);
63-
60+
// Wait for the cluster to reach topology consensus before performing checks
6461
var (consensus, initialTopologyHash) =
6562
await clusterFixture.Members.First().MemberList.TopologyConsensus(timeout);
6663

@@ -83,8 +80,11 @@ public async Task CompositeConsensusWorks()
8380

8481
afterSettingMatchingState.value.Should().Be(initialTopologyHash);
8582

83+
var updatedTopology = clusterFixture.Members.First().ExpectUpdatedTopologyConsensus();
84+
8685
await clusterFixture.SpawnMember();
87-
await Task.Delay(2000); // Allow topology state to propagate
86+
87+
await updatedTopology;
8888

8989
var afterChangingTopology =
9090
await firstNodeCheck.TryGetConsensus(TimeSpan.FromMilliseconds(500), timeout);
@@ -96,8 +96,7 @@ public async Task CompositeConsensusWorks()
9696
[Fact]
9797
public async Task CanFallOutOfConsensus()
9898
{
99-
var clusterFixture = new InMemoryClusterFixture();
100-
await using var _ = clusterFixture;
99+
await using var clusterFixture = new InMemoryClusterFixture();
101100
await clusterFixture.InitializeAsync();
102101

103102
const string initialValue = "hello consensus";
@@ -146,8 +145,7 @@ public async Task Gossip_should_replicate_large_state_with_small_batches()
146145
const int maxSend = 2;
147146
const int keysPerMember = 5;
148147

149-
var clusterFixture = new GossipClusterFixture(memberCount, fanout, maxSend);
150-
await using var _ = clusterFixture;
148+
await using var clusterFixture = new GossipClusterFixture(memberCount, fanout, maxSend);
151149
await clusterFixture.InitializeAsync();
152150

153151
var expected = clusterFixture.Members.ToDictionary(

0 commit comments

Comments
 (0)