Skip to content

Commit 7220dd3

Browse files
committed
ensure socket is disposed succesfully
1 parent 7749925 commit 7220dd3

File tree

1 file changed

+48
-22
lines changed

1 file changed

+48
-22
lines changed

test/Garnet.test.cluster/ClusterTestContext.cs

Lines changed: 48 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
using Garnet.server.Auth.Settings;
1515
using Microsoft.Extensions.Logging;
1616
using NUnit.Framework;
17+
using NUnit.Framework.Interfaces;
1718
using NUnit.Framework.Legacy;
1819
using StackExchange.Redis;
1920
using Tsavorite.core;
@@ -117,42 +118,73 @@ public void RestartNode(int nodeIndex)
117118
nodes[nodeIndex].Start();
118119
}
119120

121+
120122
public void TearDown()
121123
{
124+
// Capture test outcome before any teardown work to distinguish
125+
// primary teardown failures from secondary ones caused by a hung/failed test.
126+
var testOutcome = TestContext.CurrentContext.Result.Outcome;
127+
var testAlreadyFailed = testOutcome.Status == TestStatus.Failed;
128+
129+
if (testAlreadyFailed)
130+
{
131+
logger?.LogError(
132+
"TearDown: test already failed ({label}): {message}",
133+
testOutcome.Label,
134+
TestContext.CurrentContext.Result.Message);
135+
}
136+
122137
cts.Cancel();
123138
cts.Dispose();
124-
logger.LogDebug("0. Dispose <<<<<<<<<<<");
125139
waiter?.Dispose();
126140
clusterTestUtils?.Dispose();
127-
var timeoutSeconds = 5;
128141

129-
var failMessage = "";
142+
var timeoutSeconds = 60;
143+
string failureReason = null;
130144

131-
if (!Task.Run(() => DisposeCluster()).Wait(TimeSpan.FromSeconds(timeoutSeconds)))
145+
// Phase 1: Dispose cluster nodes (may timeout if handlers are stuck)
146+
try
132147
{
133-
logger?.LogError("Timed out waiting for DisposeCluster");
134-
failMessage += "Timed out waiting for DisposeCluster; ";
148+
if (!Task.Run(() => DisposeCluster()).Wait(TimeSpan.FromSeconds(timeoutSeconds)))
149+
{
150+
failureReason = "Timed out waiting for DisposeCluster";
151+
logger?.LogError("Timed out waiting for DisposeCluster");
152+
}
135153
}
136-
// Dispose logger factory only after servers are disposed
137-
loggerFactory?.Dispose();
138-
if (!Task.Run(() => TestUtils.DeleteDirectory(TestFolder, true)).Wait(TimeSpan.FromSeconds(timeoutSeconds)))
154+
catch (Exception ex)
139155
{
140-
logger?.LogError("Timed out DeleteDirectory");
141-
failMessage += "Timed out DeleteDirectory; ";
156+
failureReason = $"DisposeCluster threw: {ex.Message}";
157+
logger?.LogError(ex, "DisposeCluster failed");
142158
}
143159

160+
// Phase 2: Dispose logger factory (always, even after timeout)
161+
loggerFactory?.Dispose();
162+
163+
// Phase 3: Delete test directory (may timeout if files locked from Phase 1 timeout)
144164
try
145165
{
146-
TestUtils.OnTearDown();
166+
if (!Task.Run(() => TestUtils.DeleteDirectory(TestFolder, true)).Wait(TimeSpan.FromSeconds(timeoutSeconds)))
167+
{
168+
failureReason ??= "Timed out DeleteDirectory";
169+
logger?.LogError("Timed out DeleteDirectory");
170+
}
147171
}
148-
catch (AssertionException e)
172+
catch (Exception ex)
149173
{
150-
failMessage += e.Message;
174+
failureReason ??= $"DeleteDirectory threw: {ex.Message}";
175+
logger?.LogError(ex, "DeleteDirectory failed");
151176
}
152177

153-
if (failMessage != "")
178+
// Phase 4: Always runs — resets LightEpoch instances to prevent cross-test contamination
179+
TestUtils.OnTearDown();
180+
181+
// Fail the test at the end, after all cleanup is done
182+
if (failureReason != null)
154183
{
155-
ClassicAssert.Fail(failMessage);
184+
var context = testAlreadyFailed
185+
? $" (secondary failure — test already failed with '{testOutcome.Label}')"
186+
: " (primary failure — test itself passed)";
187+
Assert.Fail(failureReason + context);
156188
}
157189
}
158190

@@ -726,20 +758,14 @@ public void ClusterFailoverSpinWait(int replicaNodeIndex, ILogger logger)
726758
public void AttachAndWaitForSync(int primary_count, int replica_count, bool disableObjects)
727759
{
728760
var primaryId = clusterTestUtils.GetNodeIdFromNode(0, logger);
729-
// Issue meet to replicas
730-
for (var i = primary_count; i < primary_count + replica_count; i++)
731-
clusterTestUtils.Meet(i, 0);
732761

733762
// Wait until primary node is known so as not to fail replicate
734763
for (var i = primary_count; i < primary_count + replica_count; i++)
735764
clusterTestUtils.WaitUntilNodeIdIsKnown(i, primaryId, logger: logger);
736765

737766
// Issue cluster replicate and bump epoch manually to capture config.
738767
for (var i = primary_count; i < primary_count + replica_count; i++)
739-
{
740768
_ = clusterTestUtils.ClusterReplicate(i, primaryId, async: true, logger: logger);
741-
clusterTestUtils.BumpEpoch(i, logger: logger);
742-
}
743769

744770
if (!checkpointTask.Wait(TimeSpan.FromSeconds(100))) Assert.Fail("Checkpoint task timeout");
745771

0 commit comments

Comments
 (0)