kvserver: comment and style updates

tbg · tbg · commit 45ac58bcb1f6 · 2025-06-10T09:46:03.000+02:00
diff --git a/pkg/kv/kvserver/replica_application_result.go b/pkg/kv/kvserver/replica_application_result.go
@@ -506,7 +506,9 @@ func (r *Replica) stagePendingTruncationRaftMuLocked(pt pendingTruncation) {
 	r.asLogStorage().stagePendingTruncationRaftMuLocked(pt)
 }
 
-func (r *replicaLogStorage) stageApplySnapshot(truncState kvserverpb.RaftTruncatedState) {
+func (r *replicaLogStorage) stageApplySnapshotRaftMuLocked(
+	truncState kvserverpb.RaftTruncatedState,
+) {
 	r.raftMu.AssertHeld()
 
 	// A snapshot application implies a log truncation to the snapshot's index,
@@ -522,33 +524,20 @@ func (r *replicaLogStorage) stageApplySnapshot(truncState kvserverpb.RaftTruncat
 	// section but before the clear will see an empty log anyway, since the
 	// in-memory state is already updated to reflect the truncation, even if
 	// entries are still present in the cache.
-	//
-	// NB: a reader that obtained bounds pre-critical section might be able to
-	// load entries, though, and could repopulate the cache after it has been
-	// cleared - the cache is not "snapshotted". Ideally, mu-only readers simply
-	// cannot populate the cache.
 	defer r.cache.Drop(r.ls.RangeID)
 
 	r.mu.Lock()
 	defer r.mu.Unlock()
 
-	// Raft never accepts a snapshot that does not increase the commit index, and
-	// the commit index always refers to a log entry (unless the log is empty
-	// already). In particular, any entries in the log are guaranteed to be at
-	// indexes that this truncation will remove, and the result is an empty log
-	// (and raft entry cache). This is true even if the RawNode has entries lined
-	// up that it wants to append to the log[1] (on top of the snapshot), as these
-	// entries are not yet stable and thus not in the log/cache yet.
+	// On snapshots, the entire log is cleared. This is safe:
+	// - log entries preceding the entry represented by the snapshot are durable
+	//   via the snapshot itself, and
+	// - committed log entries ahead of the snapshot index were not acked by this
+	//   replica, or raft would not have accepted this snapshot.
 	//
-	// [1]: this is not properly supported yet and will currently fatal.
-	// See: https://github.com/cockroachdb/cockroach/pull/125530
-	// We also, in the same mu critical section, update the in-memory metadata
-	// accordingly before the change is visible on the engine. This means that
-	// even if someone used the in-memory state to grab an iterator (all within
-	// the same mu section), they would either see pre-snapshot raft log, or the
-	// post-snapshot (empty) log, but never any in-between state in which the
-	// first and last index are out of sync either with each other or with what's
-	// actually on the log engine.
+	// Here, we update the in-memory state to reflect this before making the
+	// corresponding change to on-disk state. This makes sure that concurrent
+	// readers don't try to access entries no longer present in the log.
 	r.updateStateRaftMuLockedMuLocked(logstore.RaftState{
 		LastIndex: truncState.Index,
 		LastTerm:  truncState.Term,
diff --git a/pkg/kv/kvserver/replica_raft.go b/pkg/kv/kvserver/replica_raft.go
@@ -1282,7 +1282,6 @@ func (r *Replica) handleRaftReadyRaftMuLocked(
 		// pass both engines in.
 		sm.r.assertStateRaftMuLockedReplicaMuRLocked(ctx, sm.r.store.TODOEngine())
 		sm.r.mu.RUnlock()
-
 	}
 
 	if refreshReason != noReason {
diff --git a/pkg/kv/kvserver/replica_raftstorage.go b/pkg/kv/kvserver/replica_raftstorage.go
@@ -610,7 +610,7 @@ func (r *Replica) applySnapshotRaftMuLocked(
 
 	// Stage the truncation, so that in-memory state reflects an
 	// empty log.
-	ls.stageApplySnapshot(truncState)
+	ls.stageApplySnapshotRaftMuLocked(truncState)
 
 	stats.subsumedReplicas = timeutil.Now()
 
@@ -682,7 +682,7 @@ func (r *Replica) applySnapshotRaftMuLocked(
 			state.RaftAppliedIndexTerm, nonemptySnap.Metadata.Term)
 	}
 	if ls.shMu.size != 0 {
-		log.Fatalf(ctx, "expected empty raftLogSize after snapshot, got %d", ls.shMu.size)
+		log.Fatalf(ctx, "expected empty raft log after snapshot, got %d", ls.shMu.size)
 	}
 
 	// Read the prior read summary for this range, which was included in the

Original file line number	Diff line number	Diff line change
`@@ -1282,7 +1282,6 @@ func (r *Replica) handleRaftReadyRaftMuLocked(`
`1282`	`1282`	`// pass both engines in.`
`1283`	`1283`	`sm.r.assertStateRaftMuLockedReplicaMuRLocked(ctx, sm.r.store.TODOEngine())`
`1284`	`1284`	`sm.r.mu.RUnlock()`
`1285`		`-`
`1286`	`1285`	`}`
`1287`	`1286`
`1288`	`1287`	`if refreshReason != noReason {`
Original file line number	Diff line number	Diff line change
`@@ -610,7 +610,7 @@ func (r *Replica) applySnapshotRaftMuLocked(`
`610`	`610`
`611`	`611`	`// Stage the truncation, so that in-memory state reflects an`
`612`	`612`	`// empty log.`
`613`		`- ls.stageApplySnapshot(truncState)`
	`613`	`+ ls.stageApplySnapshotRaftMuLocked(truncState)`
`614`	`614`
`615`	`615`	`stats.subsumedReplicas = timeutil.Now()`
`616`	`616`
`@@ -682,7 +682,7 @@ func (r *Replica) applySnapshotRaftMuLocked(`
`682`	`682`	`state.RaftAppliedIndexTerm, nonemptySnap.Metadata.Term)`
`683`	`683`	`}`
`684`	`684`	`if ls.shMu.size != 0 {`
`685`		`- log.Fatalf(ctx, "expected empty raftLogSize after snapshot, got %d", ls.shMu.size)`
	`685`	`+ log.Fatalf(ctx, "expected empty raft log after snapshot, got %d", ls.shMu.size)`
`686`	`686`	`}`
`687`	`687`
`688`	`688`	`// Read the prior read summary for this range, which was included in the`