@@ -417,6 +417,7 @@ type Replica struct {
417417 ReplicaID roachpb.ReplicaID
418418 Desc * roachpb.RangeDescriptor // nil for uninitialized Replica
419419
420+ tombstone kvserverpb.RangeTombstone
420421 hardState raftpb.HardState // internal to kvstorage, see migration in LoadAndReconcileReplicas
421422}
422423
@@ -463,9 +464,12 @@ func (m replicaMap) getOrMake(rangeID roachpb.RangeID) Replica {
463464 return ent
464465}
465466
466- func (m replicaMap ) setReplicaID (rangeID roachpb.RangeID , replicaID roachpb.ReplicaID ) {
467+ func (m replicaMap ) setReplicaIDAndTombstone (
468+ rangeID roachpb.RangeID , replicaID roachpb.ReplicaID , ts kvserverpb.RangeTombstone ,
469+ ) {
467470 ent := m .getOrMake (rangeID )
468471 ent .ReplicaID = replicaID
472+ ent .tombstone = ts
469473 m [rangeID ] = ent
470474}
471475
@@ -508,9 +512,16 @@ func loadReplicas(ctx context.Context, eng storage.Engine) ([]Replica, error) {
508512 }
509513 }
510514
511- // Load replicas from disk based on their RaftReplicaID and HardState.
515+ // Scan all RangeIDs present on the store, and load ReplicaID and HardState of
516+ // those that correspond to an existing replica (uninitialized or initialized).
512517 //
513- // INVARIANT: all replicas have a persisted full ReplicaID (i.e. a "ReplicaID from disk").
518+ // INVARIANT: a RangeID with no replica can only have a RangeTombstone key.
519+ // INVARIANT: all replicas have a persisted ReplicaID.
520+ // INVARIANT: ReplicaID >= RangeTombstone.NextReplicaID.
521+ //
522+ // NB: RangeIDs that only have a RangeTombstone are effectively skipped here
523+ // as uninteresting. If a non-RangeTombstone key is found for a RangeID, there
524+ // must be a replica of this range, so we load it and check invariants later.
514525 //
515526 // TODO(tbg): tighten up the case where we see a RaftReplicaID but no HardState.
516527 // This leads to the general desire to validate the internal consistency of the
@@ -522,17 +533,28 @@ func loadReplicas(ctx context.Context, eng storage.Engine) ([]Replica, error) {
522533 log .KvExec .Infof (ctx , "loaded state for %d/%d replicas" , i , len (s ))
523534 }
524535 i ++
536+ // NB: the keys must be requested in sorted order here.
537+ buf := keys .MakeRangeIDPrefixBuf (id )
538+ var ts kvserverpb.RangeTombstone
539+ if ok , err := get (buf .RangeTombstoneKey (), & ts ); err != nil {
540+ return err
541+ } else if ! ok {
542+ ts = kvserverpb.RangeTombstone {} // just in case it was mutated
543+ }
544+ // NB: the keys must be requested in sorted order here.
525545 var hs raftpb.HardState
526- if ok , err := get (keys .RaftHardStateKey (id ), & hs ); err != nil {
546+ if ok , err := get (buf .RaftHardStateKey (), & hs ); err != nil {
527547 return err
528548 } else if ok {
529549 s .setHardState (id , hs )
530550 }
551+ // NB: the keys must be requested in sorted order here.
531552 var rID kvserverpb.RaftReplicaID
532- if ok , err := get (keys .RaftReplicaIDKey (id ), & rID ); err != nil {
553+ if ok , err := get (buf .RaftReplicaIDKey (), & rID ); err != nil {
533554 return err
534555 } else if ok {
535- s .setReplicaID (id , rID .ReplicaID )
556+ // NB: the tombstone can be empty.
557+ s .setReplicaIDAndTombstone (id , rID .ReplicaID , ts )
536558 }
537559 return nil
538560 }); err != nil {
@@ -579,6 +601,12 @@ func LoadAndReconcileReplicas(ctx context.Context, eng storage.Engine) ([]Replic
579601 if repl .ReplicaID == 0 {
580602 return nil , errors .AssertionFailedf ("no RaftReplicaID for %s" , repl .Desc )
581603 }
604+ // INVARIANT: ReplicaID >= RangeTombstone.NextReplicaID.
605+ if repl .ReplicaID < repl .tombstone .NextReplicaID {
606+ return nil , errors .AssertionFailedf (
607+ "r%d: RaftReplicaID %d survived RangeTombstone %+v" ,
608+ repl .RangeID , repl .ReplicaID , repl .tombstone )
609+ }
582610
583611 if repl .Desc != nil {
584612 // INVARIANT: a Replica's RangeDescriptor always contains the local Store,
0 commit comments