Skip to content

Commit a2a9900

Browse files
craig[bot]pav-kv
andcommitted
Merge #155239
155239: kvserver: fixups around replica GC queue r=stevendanna a=pav-kv Epic: none Release note: none Co-authored-by: Pavel Kalinnikov <[email protected]>
2 parents e12309d + 4da9105 commit a2a9900

File tree

2 files changed

+9
-15
lines changed

2 files changed

+9
-15
lines changed

pkg/kv/kvserver/replica_gc_queue.go

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -306,16 +306,10 @@ func (rgcq *replicaGCQueue) process(
306306
log.VEventf(ctx, 1, "destroying local data")
307307

308308
nextReplicaID := replyDesc.NextReplicaID
309-
// Note that this seems racy - we didn't hold any locks between reading
310-
// the range descriptor above and deciding to remove the replica - but
311-
// we pass in the NextReplicaID to detect situations in which the
312-
// replica became "non-gc'able" in the meantime by checking (with raftMu
313-
// held throughout) whether the replicaID is still smaller than the
314-
// NextReplicaID. Given non-zero replica IDs don't change, this is only
315-
// possible if we currently think we're processing a pre-emptive snapshot
316-
// but discover in RemoveReplica that this range has since been added and
317-
// knows that.
318-
if err := repl.store.RemoveReplica(ctx, repl, nextReplicaID, "MVCC GC queue"); err != nil {
309+
// NB: since we didn't hold any locks between reading the range descriptor
310+
// above and deciding to remove the replica, it could have been removed
311+
// concurrently. RemoveReplica gracefully returns nil in this case.
312+
if err := repl.store.RemoveReplica(ctx, repl, nextReplicaID, "replica GC queue"); err != nil {
319313
// Should never get an error from RemoveReplica.
320314
const format = "error during replicaGC: %v"
321315
logcrash.ReportOrPanic(ctx, &repl.store.ClusterSettings().SV, format, err)
@@ -358,7 +352,7 @@ func (rgcq *replicaGCQueue) process(
358352
// we know the range to have been merged. See the Merge case of
359353
// runPreApplyTriggers() for details.
360354
if err := repl.store.RemoveReplica(
361-
ctx, repl, mergedTombstoneReplicaID, "dangling subsume via MVCC GC queue",
355+
ctx, repl, mergedTombstoneReplicaID, "dangling subsume via replica GC queue",
362356
); err != nil {
363357
return false, err
364358
}

pkg/kv/kvserver/store_remove_replica.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,11 @@ type RemoveOptions struct {
2727
}
2828

2929
// RemoveReplica removes the replica from the store's replica map and from the
30-
// sorted replicasByKey btree.
30+
// sorted replicasByKey btree. If the replica is already removed, returns nil.
3131
//
32-
// The NextReplicaID from the replica descriptor that was used to make the
33-
// removal decision is passed in. Removal is aborted if the replica ID has
34-
// advanced to or beyond the NextReplicaID since the removal decision was made.
32+
// The passed in NextReplicaID is taken from the replica descriptor used to make
33+
// the removal decision. It is written into the RangeTombstone and prevents
34+
// historical replicas (including this one) from appearing on this store again.
3535
//
3636
// The passed replica must be initialized.
3737
func (s *Store) RemoveReplica(

0 commit comments

Comments
 (0)