Skip to content

Commit ea93ddd

Browse files
committed
kvserver: move destroyStatus to shMu
This is to emphasize that both raftMu and mu need to be locked to set the destroy status; and that either of the mutexes can be held to read it. Technically, we must also hold readOnlyCmdMu to set this field, so it fits neither of Replica.{mu,shMu} section. But shMu is strictly closer by semantics. Epic: none Release note: none
1 parent a9685b0 commit ea93ddd

16 files changed

+35
-35
lines changed

pkg/kv/kvserver/replica.go

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -571,6 +571,14 @@ type Replica struct {
571571
//
572572
// TODO(pav-kv): audit all other fields and include here.
573573
shMu struct {
574+
// The destroyed status of a replica indicating if it's alive, corrupt,
575+
// scheduled for destruction or has been GCed. destroyStatus should only be
576+
// set while also holding the raftMu and readOnlyCmdMu.
577+
//
578+
// When this replica is being removed, the destroyStatus is updated and
579+
// RangeTombstone is written in the same raftMu critical section.
580+
destroyStatus
581+
574582
// The state of the Raft state machine.
575583
// Invariant: state.TruncatedState == nil. The field is being phased out in
576584
// favour of the one contained in logStorage.
@@ -587,14 +595,6 @@ type Replica struct {
587595
mu struct {
588596
// Protects all fields in the mu struct.
589597
ReplicaMutex
590-
// The destroyed status of a replica indicating if it's alive, corrupt,
591-
// scheduled for destruction or has been GCed.
592-
// destroyStatus should only be set while also holding the raftMu and
593-
// readOnlyCmdMu.
594-
//
595-
// When this replica is being removed, the destroyStatus is updated and
596-
// RangeTombstone is written in the same raftMu critical section.
597-
destroyStatus
598598
// Is the range quiescent? Quiescent ranges are not Tick()'d and unquiesce
599599
// whenever a Raft operation is performed.
600600
//
@@ -1250,7 +1250,7 @@ func (r *Replica) IsDestroyed() (DestroyReason, error) {
12501250
}
12511251

12521252
func (r *Replica) isDestroyedRLocked() (DestroyReason, error) {
1253-
return r.mu.destroyStatus.reason, r.mu.destroyStatus.err
1253+
return r.shMu.destroyStatus.reason, r.shMu.destroyStatus.err
12541254
}
12551255

12561256
// IsQuiescent returns whether the replica is quiescent or not.
@@ -2672,11 +2672,11 @@ func (r *Replica) maybeWatchForMergeLocked(ctx context.Context) (bool, error) {
26722672
r.raftMu.Lock()
26732673
r.readOnlyCmdMu.Lock()
26742674
r.mu.Lock()
2675-
if mergeCommitted && r.mu.destroyStatus.IsAlive() {
2675+
if mergeCommitted && r.shMu.destroyStatus.IsAlive() {
26762676
// The merge committed but the left-hand replica on this store hasn't
26772677
// subsumed this replica yet. Mark this replica as destroyed so it
26782678
// doesn't serve requests when we close the mergeCompleteCh below.
2679-
r.mu.destroyStatus.Set(kvpb.NewRangeNotFoundError(r.RangeID, r.store.StoreID()), destroyReasonMergePending)
2679+
r.shMu.destroyStatus.Set(kvpb.NewRangeNotFoundError(r.RangeID, r.store.StoreID()), destroyReasonMergePending)
26802680
}
26812681
// Unblock pending requests. If the merge committed, the requests will
26822682
// notice that the replica has been destroyed and return an appropriate

pkg/kv/kvserver/replica_app_batch.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,7 @@ func (b *replicaAppBatch) runPostAddTriggersReplicaOnly(
353353
// commits by handleMergeResult() to finish the removal.
354354
rhsRepl.readOnlyCmdMu.Lock()
355355
rhsRepl.mu.Lock()
356-
rhsRepl.mu.destroyStatus.Set(
356+
rhsRepl.shMu.destroyStatus.Set(
357357
kvpb.NewRangeNotFoundError(rhsRepl.RangeID, rhsRepl.store.StoreID()),
358358
destroyReasonRemoved)
359359
rhsRepl.mu.Unlock()
@@ -443,7 +443,7 @@ func (b *replicaAppBatch) runPostAddTriggersReplicaOnly(
443443
// application.
444444
b.r.readOnlyCmdMu.Lock()
445445
b.r.mu.Lock()
446-
b.r.mu.destroyStatus.Set(
446+
b.r.shMu.destroyStatus.Set(
447447
kvpb.NewRangeNotFoundError(b.r.RangeID, b.r.store.StoreID()),
448448
destroyReasonRemoved)
449449
span := b.r.descRLocked().RSpan()

pkg/kv/kvserver/replica_application_state_machine_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ func TestReplicaStateMachineChangeReplicas(t *testing.T) {
153153
defer r.readOnlyCmdMu.Unlock()
154154
r.mu.Lock()
155155
defer r.mu.Unlock()
156-
r.mu.destroyStatus.Set(errors.New("test done"), destroyReasonRemoved)
156+
r.shMu.destroyStatus.Set(errors.New("test done"), destroyReasonRemoved)
157157
})
158158
}
159159

@@ -241,7 +241,7 @@ func TestReplicaStateMachineRaftLogTruncationStronglyCoupled(t *testing.T) {
241241
r.readOnlyCmdMu.Lock()
242242
r.mu.Lock()
243243
defer r.mu.Unlock()
244-
r.mu.destroyStatus.Set(errors.New("test done"), destroyReasonRemoved)
244+
r.shMu.destroyStatus.Set(errors.New("test done"), destroyReasonRemoved)
245245
r.readOnlyCmdMu.Unlock()
246246

247247
require.Equal(t, raftAppliedIndex+1, r.shMu.state.RaftAppliedIndex)
@@ -436,7 +436,7 @@ func TestReplicaStateMachineEphemeralAppBatchRejection(t *testing.T) {
436436
defer r.readOnlyCmdMu.Unlock()
437437
r.mu.Lock()
438438
defer r.mu.Unlock()
439-
r.mu.destroyStatus.Set(errors.New("boom"), destroyReasonRemoved)
439+
r.shMu.destroyStatus.Set(errors.New("boom"), destroyReasonRemoved)
440440
}()
441441

442442
sm := r.getStateMachine()

pkg/kv/kvserver/replica_command_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -682,7 +682,7 @@ func TestWaitForLeaseAppliedIndex(t *testing.T) {
682682
defer tc.repl.readOnlyCmdMu.Unlock()
683683
tc.repl.mu.Lock()
684684
defer tc.repl.mu.Unlock()
685-
tc.repl.mu.destroyStatus.Set(destroyErr, destroyReasonRemoved)
685+
tc.repl.shMu.destroyStatus.Set(destroyErr, destroyReasonRemoved)
686686
}()
687687

688688
_, err = tc.repl.WaitForLeaseAppliedIndex(ctx, maxLAI)

pkg/kv/kvserver/replica_corruption.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ func (r *Replica) setCorruptRaftMuLocked(
4343

4444
log.Dev.ErrorfDepth(ctx, 1, "stalling replica due to: %s", cErr.ErrorMsg)
4545
cErr.Processed = true
46-
r.mu.destroyStatus.Set(cErr, destroyReasonRemoved)
46+
r.shMu.destroyStatus.Set(cErr, destroyReasonRemoved)
4747

4848
auxDir := r.store.TODOEngine().GetAuxiliaryDir()
4949
_ = r.store.TODOEngine().Env().MkdirAll(auxDir, os.ModePerm)

pkg/kv/kvserver/replica_destroy.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ func (r *Replica) disconnectReplicationRaftMuLocked(ctx context.Context) {
167167
p.finishApplication(ctx, makeProposalResultErr(kvpb.NewAmbiguousResultError(apply.ErrRemoved)))
168168
}
169169

170-
if !r.mu.destroyStatus.Removed() {
170+
if !r.shMu.destroyStatus.Removed() {
171171
log.Dev.Fatalf(ctx, "removing raft group before destroying replica %s", r)
172172
}
173173
r.mu.internalRaftGroup = nil

pkg/kv/kvserver/replica_proposal_buf.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1116,7 +1116,7 @@ func (rp *replicaProposer) getReplicaID() roachpb.ReplicaID {
11161116
}
11171117

11181118
func (rp *replicaProposer) destroyed() destroyStatus {
1119-
return rp.mu.destroyStatus
1119+
return rp.shMu.destroyStatus
11201120
}
11211121

11221122
func (rp *replicaProposer) leaseAppliedIndex() kvpb.LeaseAppliedIndex {

pkg/kv/kvserver/replica_raft.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1648,7 +1648,7 @@ func (r *Replica) refreshProposalsLocked(
16481648
}
16491649

16501650
r.mu.slowProposalCount = slowProposalCount
1651-
destroyed := r.mu.destroyStatus.Removed()
1651+
destroyed := r.shMu.destroyStatus.Removed()
16521652

16531653
// If the breaker isn't tripped yet but we've detected commands that have
16541654
// taken too long to replicate, and the replica is not destroyed, trip the
@@ -2222,7 +2222,7 @@ func (s pendingCmdSlice) Less(i, j int) bool {
22222222
func (r *Replica) withRaftGroupLocked(
22232223
f func(r *raft.RawNode) (unquiesceAndWakeLeader bool, _ error),
22242224
) error {
2225-
if r.mu.destroyStatus.Removed() {
2225+
if r.shMu.destroyStatus.Removed() {
22262226
// Callers know to detect errRemoved as non-fatal.
22272227
return errRemoved
22282228
}

pkg/kv/kvserver/replica_raftstorage.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -577,7 +577,7 @@ func (r *Replica) applySnapshotRaftMuLocked(
577577
// erroneously return empty data.
578578
sr.readOnlyCmdMu.Lock()
579579
sr.mu.Lock()
580-
sr.mu.destroyStatus.Set(
580+
sr.shMu.destroyStatus.Set(
581581
kvpb.NewRangeNotFoundError(sr.RangeID, sr.store.StoreID()),
582582
destroyReasonRemoved)
583583
sr.mu.Unlock()

pkg/kv/kvserver/replica_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13786,7 +13786,7 @@ func TestAdminScatterDestroyedReplica(t *testing.T) {
1378613786
defer tc.repl.readOnlyCmdMu.Unlock()
1378713787
tc.repl.mu.Lock()
1378813788
defer tc.repl.mu.Unlock()
13789-
tc.repl.mu.destroyStatus.Set(errBoom, destroyReasonMergePending)
13789+
tc.repl.shMu.destroyStatus.Set(errBoom, destroyReasonMergePending)
1379013790
}()
1379113791

1379213792
desc := tc.repl.Desc()

0 commit comments

Comments
 (0)