Skip to content

Commit 95273db

Browse files
ReubenBondCopilot
andcommitted
Address PR review comments for membership startup cleanup
- Use non-generic TaskCompletionSource instead of TaskCompletionSource<bool> since the result value is never used (pentp) - Fix misleading comment on myEtag null check to match actual logic - Use WaitToWriteAsync+TryWrite pattern in CleanupMyTableEntries to avoid relying on DropOldest semantics for acknowledged requests, and add cancellation via WaitAsync(_shutdownCts.Token) to prevent hanging - Always retry failed requests regardless of whether Completion is set; Completion is only signaled on success, not on individual failures Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 20505a5 commit 95273db

File tree

1 file changed

+19
-20
lines changed

1 file changed

+19
-20
lines changed

src/Orleans.Runtime/MembershipService/MembershipTableManager.cs

Lines changed: 19 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -394,7 +394,7 @@ private async Task<bool> TryUpdateMyStatusGlobalOnce(SiloStatus newStatus)
394394

395395
bool ok;
396396
TableVersion next = table.Version.Next();
397-
if (myEtag is not null) // no previous etag for my entry -> its the first write to this entry, so insert instead of update.
397+
if (myEtag is not null) // existing etag for my entry -> there is a previous row, so update instead of insert.
398398
{
399399
ok = await membershipTableProvider.UpdateRow(myEntry, myEtag, next);
400400
}
@@ -526,15 +526,24 @@ private async Task<bool> CleanupMyTableEntries(MembershipTableData table)
526526

527527
LogDebugCleanupTableEntriesAboutToDeclareDead(this.log, silosToDeclareDead.Count, Utils.EnumerableToString(silosToDeclareDead.Select(tuple => tuple.Item1)));
528528

529-
var completions = new List<Task<bool>>(silosToDeclareDead.Count);
529+
var completions = new List<Task>(silosToDeclareDead.Count);
530530
foreach (var siloData in silosToDeclareDead)
531531
{
532532
var (request, completion) = SuspectOrKillRequest.CreateAcknowledgedKillRequest(siloData.Item1.SiloAddress);
533-
await _trySuspectOrKillChannel.Writer.WriteAsync(request);
534-
completions.Add(completion);
533+
await _trySuspectOrKillChannel.Writer.WaitToWriteAsync(_shutdownCts.Token);
534+
if (_trySuspectOrKillChannel.Writer.TryWrite(request))
535+
{
536+
completions.Add(completion);
537+
}
535538
}
536539

537-
await Task.WhenAll(completions);
540+
try
541+
{
542+
await Task.WhenAll(completions).WaitAsync(_shutdownCts.Token);
543+
}
544+
catch (OperationCanceledException) when (_shutdownCts.IsCancellationRequested)
545+
{
546+
}
538547
return true;
539548
}
540549

@@ -578,13 +587,10 @@ bool IsFunctionalForMembership(SiloStatus status)
578587

579588
private class SuspectOrKillRequest
580589
{
581-
private const int MaxAttempts = 3;
582-
583590
public required SiloAddress SiloAddress { get; init; }
584591
public SiloAddress? OtherSilo { get; init; }
585592
public required RequestType Type { get; init; }
586-
public TaskCompletionSource<bool>? Completion { get; init; }
587-
public int RemainingAttempts { get; set; } = MaxAttempts;
593+
public TaskCompletionSource? Completion { get; init; }
588594

589595
public enum RequestType
590596
{
@@ -602,9 +608,9 @@ public static SuspectOrKillRequest CreateKillRequest(SiloAddress silo)
602608
};
603609
}
604610

605-
public static (SuspectOrKillRequest Request, Task<bool> Completion) CreateAcknowledgedKillRequest(SiloAddress silo)
611+
public static (SuspectOrKillRequest Request, Task Completion) CreateAcknowledgedKillRequest(SiloAddress silo)
606612
{
607-
var completion = new TaskCompletionSource<bool>(TaskCreationOptions.RunContinuationsAsynchronously);
613+
var completion = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously);
608614
var request = new SuspectOrKillRequest
609615
{
610616
SiloAddress = silo,
@@ -655,20 +661,13 @@ public async Task ProcessSuspectOrKillLists()
655661
break;
656662
}
657663
runningFailureCount = 0;
658-
request.Completion?.TrySetResult(true);
664+
request.Completion?.TrySetResult();
659665
}
660666
catch (Exception ex)
661667
{
662668
runningFailureCount += 1;
663669
LogErrorProcessingSuspectOrKillLists(this.log, ex, runningFailureCount);
664-
if (request.Completion is not null)
665-
{
666-
request.Completion.TrySetException(ex);
667-
}
668-
else if (--request.RemainingAttempts > 0)
669-
{
670-
await _trySuspectOrKillChannel.Writer.WriteAsync(request, _shutdownCts.Token);
671-
}
670+
await _trySuspectOrKillChannel.Writer.WriteAsync(request, _shutdownCts.Token);
672671
}
673672

674673
if (!reader.TryPeek(out _))

0 commit comments

Comments
 (0)