Skip to content

Commit 4792bba

Browse files
committed
kvstorage: assert each engine only access its spans
The commit adds engine assertions to assert that Log engine only accesses log engine spans, and state engine only accesses state engine spans.
1 parent a37bd6e commit 4792bba

File tree

10 files changed

+323
-287
lines changed

10 files changed

+323
-287
lines changed

pkg/kv/kvserver/kvstorage/BUILD.bazel

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,13 @@ go_library(
2121
"//pkg/kv/kvserver/kvserverpb",
2222
"//pkg/kv/kvserver/logstore",
2323
"//pkg/kv/kvserver/rditer",
24+
"//pkg/kv/kvserver/spanset",
2425
"//pkg/raft/raftpb",
2526
"//pkg/roachpb",
2627
"//pkg/storage",
2728
"//pkg/storage/enginepb",
2829
"//pkg/storage/fs",
30+
"//pkg/util",
2931
"//pkg/util/buildutil",
3032
"//pkg/util/hlc",
3133
"//pkg/util/iterutil",
@@ -45,6 +47,7 @@ go_test(
4547
"init_test.go",
4648
"initial_test.go",
4749
"stateloader_test.go",
50+
"storage_test.go",
4851
],
4952
data = glob(["testdata/**"]),
5053
embed = [":kvstorage"],
@@ -56,6 +59,7 @@ go_test(
5659
"//pkg/kv/kvserver/kvserverpb",
5760
"//pkg/kv/kvserver/logstore",
5861
"//pkg/kv/kvserver/print",
62+
"//pkg/kv/kvserver/spanset",
5963
"//pkg/raft/raftpb",
6064
"//pkg/roachpb",
6165
"//pkg/settings/cluster",

pkg/kv/kvserver/kvstorage/destroy_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb"
1919
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/logstore"
2020
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/print"
21+
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/spanset"
2122
"github.com/cockroachdb/cockroach/pkg/raft/raftpb"
2223
"github.com/cockroachdb/cockroach/pkg/roachpb"
2324
"github.com/cockroachdb/cockroach/pkg/storage"
@@ -139,7 +140,7 @@ func (r *replicaInfo) createStateMachine(ctx context.Context, t *testing.T, w st
139140
// TODO(pav-kv): figure out whether LastReplicaGCTimestamp should be in the
140141
// log or state engine.
141142
require.NoError(t, storage.MVCCBlindPutProto(
142-
ctx, w,
143+
ctx, spanset.DisableWriterAssertions(w),
143144
keys.RangeLastReplicaGCTimestampKey(r.id.RangeID),
144145
hlc.Timestamp{}, /* timestamp */
145146
&hlc.Timestamp{WallTime: 12345678},

pkg/kv/kvserver/kvstorage/storage.go

Lines changed: 149 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,13 @@
66
package kvstorage
77

88
import (
9+
"github.com/cockroachdb/cockroach/pkg/keys"
10+
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/spanset"
11+
"github.com/cockroachdb/cockroach/pkg/roachpb"
912
"github.com/cockroachdb/cockroach/pkg/storage"
13+
"github.com/cockroachdb/cockroach/pkg/util"
1014
"github.com/cockroachdb/cockroach/pkg/util/buildutil"
15+
"github.com/cockroachdb/errors"
1116
)
1217

1318
// The following are type aliases that help annotating various storage
@@ -121,8 +126,15 @@ type Engines struct {
121126
// MakeEngines creates an Engines handle in which both state machine and log
122127
// engine reside in the same physical engine.
123128
func MakeEngines(eng storage.Engine) Engines {
124-
// TODO(#158281): in test builds, wrap the engines in a way that allows
125-
// verifying that all accesses touch the correct subset of keys.
129+
if util.RaceEnabled {
130+
// Wrap the engines with span set engines to catch incorrect engine
131+
// accesses.
132+
return Engines{
133+
stateEngine: spanset.NewEngine(eng, validateIsStateEngineSpan),
134+
logEngine: spanset.NewEngine(eng, validateIsRaftEngineSpan),
135+
todoEngine: eng,
136+
}
137+
}
126138
return Engines{
127139
stateEngine: eng,
128140
todoEngine: eng,
@@ -137,6 +149,16 @@ func MakeSeparatedEnginesForTesting(state, log storage.Engine) Engines {
137149
if !buildutil.CrdbTestBuild {
138150
panic("separated engines are not supported")
139151
}
152+
if util.RaceEnabled {
153+
// Wrap the engines with span set engines to catch incorrect engine
154+
// accesses.
155+
return Engines{
156+
stateEngine: spanset.NewEngine(state, validateIsStateEngineSpan),
157+
todoEngine: nil,
158+
logEngine: spanset.NewEngine(log, validateIsRaftEngineSpan),
159+
separated: true,
160+
}
161+
}
140162
return Engines{
141163
stateEngine: state,
142164
todoEngine: nil,
@@ -178,3 +200,128 @@ func (e *Engines) TODOEngine() storage.Engine {
178200
func (e *Engines) Separated() bool {
179201
return e.separated
180202
}
203+
204+
// validateIsStateEngineSpan asserts that the provided span only overlaps with
205+
// keys in the State engine and returns an error if not.
206+
// Note that we could receive the span with a nil startKey, which has a special
207+
// meaning that the span represents: [endKey.Prev(), endKey).
208+
func validateIsStateEngineSpan(span spanset.TrickySpan) error {
209+
// If the provided span overlaps with local store span, it cannot be a
210+
// StateEngine span because Store-local keys belong to the LogEngine.
211+
if spanset.Overlaps(roachpb.Span{
212+
Key: keys.LocalStorePrefix,
213+
EndKey: keys.LocalStoreMax,
214+
}, span) {
215+
return errors.Errorf("overlaps with store local keys")
216+
}
217+
218+
// If the provided span is completely outside the rangeID local spans for any
219+
// rangeID, then there is no overlap with any rangeID local keys.
220+
fullRangeIDLocalSpans := roachpb.Span{
221+
Key: keys.LocalRangeIDPrefix.AsRawKey(),
222+
EndKey: keys.LocalRangeIDPrefix.AsRawKey().PrefixEnd(),
223+
}
224+
if !spanset.Overlaps(fullRangeIDLocalSpans, span) {
225+
return nil
226+
}
227+
228+
// At this point, we know that we overlap with fullRangeIDLocalSpans. If we
229+
// are not completely within fullRangeIDLocalSpans, return an error as we
230+
// make an assumption that spans should respect the local RangeID tree
231+
// structure, and that spans that partially overlaps with
232+
// fullRangeIDLocalSpans don't make logical sense.
233+
if !spanset.Contains(fullRangeIDLocalSpans, span) {
234+
return errors.Errorf("overlapping an unreplicated rangeID key")
235+
}
236+
237+
// If the span in inside fullRangeIDLocalSpans, we expect that both start and
238+
// end keys should be in the same rangeID.
239+
rangeIDKey := span.Key
240+
if rangeIDKey == nil {
241+
rangeIDKey = span.EndKey
242+
}
243+
rangeID, err := keys.DecodeRangeIDPrefix(rangeIDKey)
244+
if err != nil {
245+
return errors.NewAssertionErrorWithWrappedErrf(err,
246+
"could not decode range ID for span: %s", span)
247+
}
248+
249+
// If the span is inside RangeIDLocalSpans but outside RangeIDUnreplicated,
250+
// it cannot overlap local raft keys.
251+
rangeIDPrefixBuf := keys.MakeRangeIDPrefixBuf(rangeID)
252+
if !spanset.Overlaps(roachpb.Span{
253+
Key: rangeIDPrefixBuf.UnreplicatedPrefix(),
254+
EndKey: rangeIDPrefixBuf.UnreplicatedPrefix().PrefixEnd(),
255+
}, span) {
256+
return nil
257+
}
258+
259+
// RangeTombstoneKey and RaftReplicaIDKey belong to the StateEngine, and can
260+
// be accessed as point keys.
261+
if roachpb.Span(span).Equal(roachpb.Span{
262+
Key: rangeIDPrefixBuf.RangeTombstoneKey(),
263+
}) {
264+
return nil
265+
}
266+
267+
if roachpb.Span(span).Equal(roachpb.Span{
268+
Key: rangeIDPrefixBuf.RaftReplicaIDKey(),
269+
}) {
270+
return nil
271+
}
272+
273+
return errors.Errorf("overlapping an unreplicated rangeID span")
274+
}
275+
276+
// validateIsRaftEngineSpan asserts that the provided span only overlaps with
277+
// keys in the Raft engine and returns an error if not.
278+
// Note that we could receive the span with a nil startKey, which has a special
279+
// meaning that the span represents: [endKey.Prev(), endKey).
280+
func validateIsRaftEngineSpan(span spanset.TrickySpan) error {
281+
// The LogEngine owns only Store-local and RangeID-local raft keys. A span
282+
// inside Store-local is correct. If it's only partially inside, an error is
283+
// returned below, as part of checking RangeID-local spans.
284+
if spanset.Contains(roachpb.Span{
285+
Key: keys.LocalStorePrefix,
286+
EndKey: keys.LocalStoreMax,
287+
}, span) {
288+
return nil
289+
}
290+
291+
// At this point, the remaining possible LogEngine keys are inside
292+
// LocalRangeID spans. If the span is not completely inside it, it must
293+
// overlap with some StateEngine keys.
294+
if !spanset.Contains(roachpb.Span{
295+
Key: keys.LocalRangeIDPrefix.AsRawKey(),
296+
EndKey: keys.LocalRangeIDPrefix.AsRawKey().PrefixEnd(),
297+
}, span) {
298+
return errors.Errorf("overlaps with state engine keys")
299+
}
300+
301+
// If the span in inside LocalRangeID, we assume that both start and
302+
// end keys should be in the same rangeID.
303+
rangeIDKey := span.Key
304+
if rangeIDKey == nil {
305+
rangeIDKey = span.EndKey
306+
}
307+
rangeID, err := keys.DecodeRangeIDPrefix(rangeIDKey)
308+
if err != nil {
309+
return errors.NewAssertionErrorWithWrappedErrf(err,
310+
"could not decode range ID for span: %s", span)
311+
}
312+
rangeIDPrefixBuf := keys.MakeRangeIDPrefixBuf(rangeID)
313+
if !spanset.Contains(roachpb.Span{
314+
Key: rangeIDPrefixBuf.UnreplicatedPrefix(),
315+
EndKey: rangeIDPrefixBuf.UnreplicatedPrefix().PrefixEnd(),
316+
}, span) {
317+
return errors.Errorf("overlaps with state engine keys")
318+
}
319+
if spanset.Overlaps(roachpb.Span{Key: rangeIDPrefixBuf.RangeTombstoneKey()}, span) {
320+
return errors.Errorf("overlaps with state engine keys")
321+
}
322+
if spanset.Overlaps(roachpb.Span{Key: rangeIDPrefixBuf.RaftReplicaIDKey()}, span) {
323+
return errors.Errorf("overlaps with state engine keys")
324+
}
325+
326+
return nil
327+
}
Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
// Copyright 2025 The Cockroach Authors.
2+
//
3+
// Use of this software is governed by the CockroachDB Software License
4+
// included in the /LICENSE file.
5+
6+
package kvstorage
7+
8+
import (
9+
"testing"
10+
11+
"github.com/cockroachdb/cockroach/pkg/keys"
12+
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/spanset"
13+
"github.com/cockroachdb/cockroach/pkg/roachpb"
14+
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
15+
"github.com/cockroachdb/cockroach/pkg/util/log"
16+
"github.com/stretchr/testify/require"
17+
)
18+
19+
func TestValidateIsStateEngineSpan(t *testing.T) {
20+
defer leaktest.AfterTest(t)()
21+
defer log.Scope(t).Close(t)
22+
23+
s := func(start, end roachpb.Key) roachpb.Span {
24+
return roachpb.Span{Key: start, EndKey: end}
25+
}
26+
testCases := []struct {
27+
span roachpb.Span
28+
notOk bool
29+
}{
30+
// Full state engine spans.
31+
{span: s(roachpb.KeyMin, keys.LocalRangeIDPrefix.AsRawKey())},
32+
{span: s(keys.RangeForceFlushKey(1), keys.RangeLeaseKey(1))},
33+
{span: s(keys.LocalStoreMax, roachpb.KeyMax)},
34+
35+
// Full non-state engine spans.
36+
{span: s(roachpb.KeyMin, keys.MakeRangeIDUnreplicatedPrefix(1)), notOk: true}, // partial overlap
37+
{span: s(roachpb.KeyMin, roachpb.Key(keys.LocalStorePrefix).Next()), notOk: true},
38+
{span: s(roachpb.KeyMin, keys.RaftTruncatedStateKey(1)), notOk: true},
39+
{span: s(keys.LocalRangeIDPrefix.AsRawKey(), keys.LocalRangeIDPrefix.AsRawKey().PrefixEnd()),
40+
notOk: true},
41+
{span: s(keys.RaftTruncatedStateKey(1), keys.RaftTruncatedStateKey(2)), notOk: true},
42+
{span: s(keys.MakeRangeIDUnreplicatedPrefix(1).PrefixEnd(), roachpb.KeyMax), notOk: true}, // partial overlap
43+
{span: s(keys.MakeRangeIDUnreplicatedPrefix(1),
44+
keys.MakeRangeIDUnreplicatedPrefix(1).PrefixEnd()), notOk: true},
45+
{span: s(keys.RangeTombstoneKey(1), keys.RaftTruncatedStateKey(1)), notOk: true},
46+
{span: s(keys.RaftReplicaIDKey(1), keys.RaftTruncatedStateKey(1)), notOk: true},
47+
{span: s(keys.RaftTruncatedStateKey(1), roachpb.KeyMax), notOk: true},
48+
{span: s(keys.LocalStorePrefix, keys.LocalStoreMax), notOk: true},
49+
{span: s(keys.StoreGossipKey(), keys.StoreIdentKey()), notOk: true},
50+
{span: s(keys.LocalStoreMax.Prevish(1), roachpb.KeyMax), notOk: true},
51+
52+
// Point state engine spans.
53+
{span: s(roachpb.KeyMin, nil)},
54+
{span: s(keys.LocalRangeIDPrefix.AsRawKey().Prevish(1), nil)},
55+
{span: s(keys.RangeForceFlushKey(1), nil)},
56+
{span: s(keys.MakeRangeIDUnreplicatedPrefix(1).Prevish(1), nil)},
57+
{span: s(keys.RangeTombstoneKey(1), nil)},
58+
{span: s(keys.RaftReplicaIDKey(1), nil)},
59+
{span: s(keys.MakeRangeIDUnreplicatedPrefix(1).PrefixEnd(), nil)},
60+
{span: s(keys.LocalRangeIDPrefix.AsRawKey().PrefixEnd(), nil)},
61+
{span: s(roachpb.Key(keys.LocalStorePrefix).Prevish(1), nil)},
62+
{span: s(keys.LocalStoreMax, nil)},
63+
{span: s(keys.LocalStoreMax.Next(), nil)},
64+
{span: s(roachpb.KeyMax, nil)},
65+
66+
// Point non-state engine spans.
67+
{span: s(keys.MakeRangeIDUnreplicatedPrefix(1), nil), notOk: true},
68+
{span: s(keys.RaftTruncatedStateKey(1), nil), notOk: true},
69+
{span: s(keys.RaftTruncatedStateKey(2), nil), notOk: true},
70+
{span: s(keys.MakeRangeIDUnreplicatedPrefix(1).PrefixEnd().Prevish(1), nil), notOk: true},
71+
{span: s(keys.LocalStorePrefix, nil), notOk: true},
72+
{span: s(keys.StoreGossipKey(), nil), notOk: true},
73+
{span: s(keys.LocalStoreMax.Prevish(1), nil), notOk: true},
74+
75+
// Tricky state engine spans.
76+
{span: s(nil, keys.LocalRangeIDPrefix.AsRawKey())},
77+
{span: s(nil, keys.MakeRangeIDUnreplicatedPrefix(1))},
78+
{span: s(nil, keys.RangeForceFlushKey(1))},
79+
{span: s(nil, keys.MakeRangeIDUnreplicatedPrefix(1).PrefixEnd().Next())},
80+
{span: s(nil, keys.LocalRangeIDPrefix.AsRawKey().PrefixEnd().Next())},
81+
{span: s(nil, keys.LocalStorePrefix)},
82+
{span: s(nil, keys.LocalStoreMax.Next())},
83+
84+
// Tricky non-state engine spans.
85+
{span: s(nil, keys.MakeRangeIDUnreplicatedPrefix(1).Next()), notOk: true},
86+
{span: s(nil, keys.RaftTruncatedStateKey(1).Next()), notOk: true},
87+
{span: s(nil, keys.RaftTruncatedStateKey(2).Next()), notOk: true},
88+
{span: s(nil, keys.MakeRangeIDUnreplicatedPrefix(1).PrefixEnd()), notOk: true},
89+
{span: s(nil, keys.LocalRangeIDPrefix.AsRawKey().PrefixEnd()), notOk: true}, // can't decode RangeID.
90+
{span: s(nil, roachpb.Key(keys.LocalStorePrefix).Next()), notOk: true},
91+
{span: s(nil, keys.StoreGossipKey()), notOk: true},
92+
{span: s(nil, keys.LocalStoreMax), notOk: true},
93+
}
94+
95+
for _, tc := range testCases {
96+
t.Run("", func(t *testing.T) {
97+
err := validateIsStateEngineSpan(spanset.TrickySpan(tc.span))
98+
require.Equal(t, tc.notOk, err != nil, tc.span)
99+
})
100+
}
101+
}
102+
103+
func TestValidateIsRaftEngineSpan(t *testing.T) {
104+
defer leaktest.AfterTest(t)()
105+
defer log.Scope(t).Close(t)
106+
107+
s := func(start, end roachpb.Key) roachpb.Span {
108+
return roachpb.Span{Key: start, EndKey: end}
109+
}
110+
testCases := []struct {
111+
span roachpb.Span
112+
notOk bool
113+
}{
114+
// Full spans not overlapping with state engine spans.
115+
{span: s(keys.RangeTombstoneKey(1).Next(), keys.RaftReplicaIDKey(1))},
116+
{span: s(keys.RaftReplicaIDKey(1).Next(), keys.RangeLastReplicaGCTimestampKey(1).Next())},
117+
{span: s(keys.LocalStorePrefix, keys.LocalStoreMax)},
118+
119+
// Full spans overlapping with state engine spans.
120+
{span: s(keys.MinKey, keys.LocalStorePrefix), notOk: true},
121+
{span: s(keys.RangeGCThresholdKey(1), keys.RangeVersionKey(1)), notOk: true},
122+
{span: s(keys.RangeTombstoneKey(1), keys.RaftReplicaIDKey(1)), notOk: true},
123+
{span: s(keys.RaftReplicaIDKey(1), keys.LocalRangeIDPrefix.AsRawKey().PrefixEnd()), notOk: true},
124+
{span: s(keys.RangeGCThresholdKey(1), keys.RangeGCThresholdKey(2)), notOk: true},
125+
{span: s(keys.LocalRangeIDPrefix.AsRawKey().PrefixEnd(), keys.LocalStorePrefix), notOk: true},
126+
{span: s(keys.LocalStoreMax, keys.MaxKey), notOk: true},
127+
128+
// Point spans not overlapping with state engine spans.
129+
{span: s(keys.RaftLogKey(1, 1), nil)},
130+
{span: s(keys.RangeTombstoneKey(1).Next(), nil)},
131+
{span: s(keys.RaftReplicaIDKey(1).Next(), nil)},
132+
{span: s(keys.RaftTruncatedStateKey(1).Next(), nil)},
133+
{span: s(keys.LocalStorePrefix, nil)},
134+
{span: s(roachpb.Key(keys.LocalStorePrefix).Next(), nil)},
135+
{span: s(keys.LocalStoreMax.Prevish(1), nil)},
136+
137+
// Point spans overlapping with state engine spans.
138+
{span: s(keys.LocalRangeIDPrefix.AsRawKey(), nil), notOk: true}, // invalid start key
139+
{span: s(keys.RangeLeaseKey(1), nil), notOk: true},
140+
{span: s(keys.RangeTombstoneKey(1), nil), notOk: true},
141+
{span: s(keys.RaftReplicaIDKey(1), nil), notOk: true},
142+
{span: s(keys.RangeTombstoneKey(2), nil), notOk: true},
143+
{span: s(keys.RaftReplicaIDKey(2), nil), notOk: true},
144+
{span: s(keys.LocalRangeIDPrefix.AsRawKey().PrefixEnd(), nil), notOk: true},
145+
146+
// Tricky spans not overlapping with state engine spans.
147+
{span: s(nil, keys.RaftLogKey(1, 1))},
148+
{span: s(nil, keys.RangeTombstoneKey(1))},
149+
{span: s(nil, keys.RaftReplicaIDKey(1))},
150+
{span: s(nil, roachpb.Key(keys.LocalStorePrefix).PrefixEnd())},
151+
152+
// Tricky spans overlapping with state engine spans.
153+
{span: s(nil, keys.LocalRangeIDPrefix.AsRawKey()), notOk: true},
154+
{span: s(nil, keys.LocalStorePrefix), notOk: true},
155+
{span: s(nil, keys.RangeLeaseKey(1)), notOk: true},
156+
{span: s(nil, keys.RangeTombstoneKey(1).Next()), notOk: true},
157+
{span: s(nil, keys.RaftReplicaIDKey(1).Next()), notOk: true},
158+
{span: s(nil, roachpb.Key(keys.LocalStorePrefix).PrefixEnd().Next()), notOk: true},
159+
}
160+
161+
for _, tc := range testCases {
162+
t.Run("", func(t *testing.T) {
163+
err := validateIsRaftEngineSpan(spanset.TrickySpan(tc.span))
164+
require.Equal(t, tc.notOk, err != nil, tc.span)
165+
})
166+
}
167+
}

0 commit comments

Comments
 (0)