Skip to content

Commit af813e0

Browse files
committed
storage: throw error if ComputStatsDiff asumptions are invalid
ComputeStatsDiff can only compute accurate stats if an engine key overlaps with an sst key (i.e. engKey.Key == iterKey.Key), the sst key shadows the latest eng key or is a duplicate. If the sst violates this assumption, an error is now thrown, instead of silently returning inaccurate stats. Epic: none Release note: none
1 parent afef2d2 commit af813e0

File tree

2 files changed

+224
-48
lines changed

2 files changed

+224
-48
lines changed

pkg/storage/sst.go

Lines changed: 105 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -100,18 +100,21 @@ func errorIfReaderContainsRangeKeys(
100100
return nil
101101
}
102102

103+
var ComputeStatsDiffViolation = errors.New("ComputeStatsDiff assumptions violated")
104+
103105
// ComputeSSTStatsDiff computes a diff of the key span's mvcc stats if the sst
104106
// were applied. Note, the incoming sst must not contain any range keys. The key
105107
// span must be contained in the global keyspace.
106108
//
107-
// This function assumes that if an engine key overlaps with an sst key
108-
// (i.e. engKey.Key == iterKey.Key), the sst key shadows the latest eng key or
109-
// is a duplicate. Here are two valid examples:
109+
// This function can only compute accurate stats if an engine key overlaps with
110+
// an sst key (i.e. engKey.Key == iterKey.Key), the sst key shadows the latest
111+
// eng key or is a duplicate. If the sst violates this assumption, an error is
112+
// thrown. Here are two valid examples:
110113
//
111114
// 1. sst: a2, a1, eng: a4, a3, a2, a1
112115
// 2. sst: a4, a3, a2 eng: a2, a1
113116
//
114-
// The function cannot handle the following case: sst: a1, eng: a2
117+
// The function cannot handle the following case: sst: a1, eng: a2.
115118
//
116119
// Overall control flow:
117120
//
@@ -121,17 +124,16 @@ func errorIfReaderContainsRangeKeys(
121124
// engine that overlaps with the sst key.
122125
//
123126
// Detect duplicates in the sst: If engKey.Key == iterKey.Key and
124-
// engKey.Timestamp >= iterKey.Timestamp, advance both iterators to the next
125-
// roachpb key, assume the remaining versions of the key in the sst are
126-
// duplicates, and thus will not contribute to stats. Jump to top.
127+
// engKey.Timestamp >= iterKey.Timestamp, iterate through remaining mvcc
128+
// versions in the sst to ensure assumptions are valid. Jump to top.
127129
//
128130
// At this point, the current sstKey will contribute to stats: it either shadows
129131
// an eng key or no eng key overlaps with it.
130132
//
131133
// Call sstIter.Next()
132134
//
133135
// TODO(msbutler): Currently, this helper throws an error if the engine contains
134-
// range keys. support range keys in the engine.
136+
// range keys. Support range keys in the engine.
135137
func ComputeSSTStatsDiff(
136138
ctx context.Context, sst []byte, reader Reader, nowNanos int64, start, end MVCCKey,
137139
) (enginepb.MVCCStats, error) {
@@ -170,7 +172,6 @@ func ComputeSSTStatsDiff(
170172

171173
engIter, err := reader.NewMVCCIterator(ctx, MVCCKeyIterKind, IterOptions{
172174
KeyTypes: IterKeyTypePointsOnly,
173-
useL6Filters: true,
174175
ReadCategory: fs.BatchEvalReadCategory,
175176
UpperBound: end.Key,
176177
})
@@ -179,7 +180,9 @@ func ComputeSSTStatsDiff(
179180
}
180181
defer engIter.Close()
181182

182-
var engIterKey MVCCKey
183+
var (
184+
engIterKey, sstIterKey, prevSSTIterKey MVCCKey
185+
)
183186

184187
// setEngIterKey sets the engIterKey to the next key in the engine that is
185188
// greater than or equal to the passed in unversioned nextSSTKey. When
@@ -221,23 +224,100 @@ func ComputeSSTStatsDiff(
221224
return ms, errors.New("SST is empty")
222225
}
223226

224-
// processDuplicates advances the sst iterator to the next roachpb key, as all
225-
// remaining versions of this sst key should not contribute to stats.
227+
// processDuplicates returns an error if the incoming sst violates an
228+
// assumption required to compute accurate stats (see top level function
229+
// comment for details).
226230
processDuplicates := func() error {
227-
// TODO (msbutler): detect if the sst contains a version of the key not in
228-
// the engine, and if so, increment ContainsEstimates.
229-
sstIter.NextKey()
230-
return nil
231+
232+
checkEngIterValid := func() error {
233+
ok, err := engIter.Valid()
234+
if err != nil {
235+
return err
236+
}
237+
if !ok {
238+
// We've exhausted the eng iter after detecting sstKey == engKey, but
239+
// there there exists a valid sstIter key that is not already in the
240+
// engine.
241+
//
242+
// sst: a2
243+
// eng: a3
244+
return ComputeStatsDiffViolation
245+
246+
}
247+
return nil
248+
}
249+
250+
// First advance the engine iterator to the sstIterator mvcc key. Recall we
251+
// entered this function with the following properties: sstKey == engKey and
252+
// engKeyTimestamp is equal to or more recent than sstKeyTimestamp-- i.e.
253+
// a4, a3, or a2 below:
254+
//
255+
// sst: a2, a1
256+
// eng: a4, a3, a2, a1
257+
//
258+
// Pebble's SeekGE has significant overhead if advancing to the
259+
// desirable key requires only a couple Next() calls. Since we do not
260+
// expect the engine to have that many versions of the key, try to
261+
// advance the iterator with 5 next calls, then fall back to the more
262+
// expensive SeekGE.
263+
nextCount := 0
264+
for {
265+
if engIterKey.Compare(sstIterKey) >= 0 {
266+
break
267+
}
268+
if nextCount > 5 {
269+
engIter.SeekGE(sstIterKey)
270+
} else {
271+
engIter.Next()
272+
nextCount++
273+
}
274+
if err := checkEngIterValid(); err != nil {
275+
return err
276+
}
277+
engIterKey = engIter.UnsafeKey()
278+
}
279+
280+
for {
281+
// At the top of the loop, both iterators are valid, and should
282+
// be equal if shadowing assumptions are held.
283+
if sstIterKey.Compare(engIterKey) != 0 {
284+
// The current sstKey does not exist in the engine.
285+
return ComputeStatsDiffViolation
286+
}
287+
288+
// The current engine and sst keys match. Move to next mvcc verstion.
289+
if sstIterKey.Key.Compare(prevSSTIterKey.Key) != 0 {
290+
prevSSTIterKey.Key = append(prevSSTIterKey.Key[:0], sstIterKey.Key...)
291+
}
292+
prevSSTIterKey.Timestamp = sstIterKey.Timestamp
293+
sstIter.Next()
294+
295+
if ok, err := sstIter.Valid(); !ok || err != nil {
296+
return err
297+
}
298+
sstIterKey = sstIter.UnsafeKey()
299+
300+
if prevSSTIterKey.Key.Less(sstIterKey.Key) {
301+
// sstIterator now lives on the next key, so we have finished processing
302+
// duplicates.
303+
return nil
304+
}
305+
306+
engIter.Next()
307+
if err := checkEngIterValid(); err != nil {
308+
return err
309+
}
310+
engIterKey = engIter.UnsafeKey()
311+
}
231312
}
232313

233-
prevSSTKey := NilKey
234314
for {
235315
if ok, err := sstIter.Valid(); err != nil {
236316
return ms, err
237317
} else if !ok {
238318
break
239319
}
240-
sstIterKey := sstIter.UnsafeKey()
320+
sstIterKey = sstIter.UnsafeKey()
241321

242322
// To understand if this sst key overlaps with an eng key, advance the eng
243323
// iterator to the live key at or after the sst key.
@@ -254,8 +334,6 @@ func ComputeSSTStatsDiff(
254334
// sst: a2, a1
255335
// eng: a4, a3, a2, a1
256336
if sstKeySameAsEng && sstIterKey.Timestamp.LessEq(engIterKey.Timestamp) {
257-
prevSSTKey.Key = append(prevSSTKey.Key[:0], sstIterKey.Key...)
258-
prevSSTKey.Timestamp = sstIterKey.Timestamp
259337
if err := processDuplicates(); err != nil {
260338
return ms, err
261339
}
@@ -268,7 +346,7 @@ func ComputeSSTStatsDiff(
268346

269347
// isMetaKey indicates the current sstKey is the latest version of the key
270348
// in the sst.
271-
isMetaKey := prevSSTKey.Key.Compare(sstIterKey.Key) != 0
349+
isMetaKey := prevSSTIterKey.Key.Compare(sstIterKey.Key) != 0
272350

273351
sstVal, err := sstIter.UnsafeValue()
274352
if err != nil {
@@ -296,7 +374,7 @@ func ComputeSSTStatsDiff(
296374
// If the sst key is not live, it must contribute to GCBytesAge. If the
297375
// key is a tombstone it accrues GCBytesAge at its own timestamp, else at
298376
// the timestamp which it is shadowed.
299-
nonLiveTime := prevSSTKey.Timestamp.WallTime
377+
nonLiveTime := prevSSTIterKey.Timestamp.WallTime
300378
if sstValueIsTombstone {
301379
nonLiveTime = sstIterKey.Timestamp.WallTime
302380
}
@@ -350,13 +428,14 @@ func ComputeSSTStatsDiff(
350428
ms.LiveBytes -= engMetaKeySize + MVCCVersionTimestampSize + engValSize
351429
ms.GCBytesAge += gcBytes * (nowNanos/1e9 - sstIterKey.Timestamp.WallTime/1e9)
352430
} else {
353-
ms.GCBytesAge += gcBytes * (prevSSTKey.Timestamp.WallTime/1e9 - sstIterKey.Timestamp.WallTime/1e9)
431+
ms.GCBytesAge += gcBytes * (prevSSTIterKey.Timestamp.WallTime/1e9 - sstIterKey.Timestamp.WallTime/1e9)
354432
}
355433
}
356434
}
357-
358-
prevSSTKey.Key = append(prevSSTKey.Key[:0], sstIterKey.Key...)
359-
prevSSTKey.Timestamp = sstIterKey.Timestamp
435+
if isMetaKey {
436+
prevSSTIterKey.Key = append(prevSSTIterKey.Key[:0], sstIterKey.Key...)
437+
}
438+
prevSSTIterKey.Timestamp = sstIterKey.Timestamp
360439
sstIter.Next()
361440
}
362441
ms.LastUpdateNanos = nowNanos

pkg/storage/sst_stats_diff_test.go

Lines changed: 119 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,21 @@ import (
2525
"github.com/stretchr/testify/require"
2626
)
2727

28+
// pFixed is like p, but simply sets the value to the key. This is useful for
29+
// test cases that process duplicates between the eng and the sst, since we
30+
// cannot handle two identical roachpb keys + timestamps with different values.
31+
func pFixed(stringifiedKVs string) storageutils.KVs {
32+
kvs := storageutils.KVs{}
33+
for i := 0; i < len(stringifiedKVs); i += 2 {
34+
key := string(stringifiedKVs[i])
35+
ts := int64(stringifiedKVs[i+1]) * 1e9
36+
value := key
37+
kv := storageutils.PointKV(key, int(ts), value)
38+
kvs = append(kvs, kv)
39+
}
40+
return kvs
41+
}
42+
2843
func TestMVCCComputeSSTStatsDiff(t *testing.T) {
2944
defer leaktest.AfterTest(t)()
3045
defer log.Scope(t).Close(t)
@@ -63,25 +78,13 @@ func TestMVCCComputeSSTStatsDiff(t *testing.T) {
6378
return kvs
6479
}
6580

66-
// pfixed is like p, but simply sets the value to the key. This is useful for
67-
// test cases that process duplicates between the eng and the sst, since we
68-
// cannot handle two identical roachpb keys + timestamps with different values.
69-
pFixed := func(stringifiedKVs string) storageutils.KVs {
70-
kvs := storageutils.KVs{}
71-
for i := 0; i < len(stringifiedKVs); i += 2 {
72-
key := string(stringifiedKVs[i])
73-
ts := int64(stringifiedKVs[i+1])
74-
value := key
75-
kv := storageutils.PointKV(key, int(ts), value)
76-
kvs = append(kvs, kv)
77-
}
78-
return kvs
79-
}
80-
8181
testCases := []struct {
8282
name string
83-
sst storageutils.KVs
84-
eng storageutils.KVs
83+
// sst describes an sst that the test will run ComputeStatsDiff on
84+
sst storageutils.KVs
85+
86+
// eng describes the keys in the existing key space
87+
eng storageutils.KVs
8588
}{
8689
{
8790
name: "emptyKeyspace",
@@ -153,6 +156,26 @@ func TestMVCCComputeSSTStatsDiff(t *testing.T) {
153156
sst: p("a2a1c2"),
154157
eng: p("b1c1d2"),
155158
},
159+
{
160+
name: "dupeThenSSTAhead",
161+
sst: pFixed("a2c1"),
162+
eng: pFixed("a2b1"),
163+
},
164+
{
165+
name: "dupeThenSSTBehind",
166+
sst: pFixed("a2b1"),
167+
eng: pFixed("a2c1"),
168+
},
169+
{
170+
name: "doubleDupe",
171+
sst: pFixed("a2b1"),
172+
eng: pFixed("a2b1"),
173+
},
174+
{
175+
name: "processDupesFallBackToSeek",
176+
sst: pFixed("a1"),
177+
eng: pFixed("a7a6a5a4a3a2a1"),
178+
},
156179
}
157180

158181
for _, tc := range testCases {
@@ -169,11 +192,6 @@ func TestMVCCComputeSSTStatsDiff(t *testing.T) {
169192
eng, sst = addRandomDeletes(t, rng, eng, sst)
170193
}
171194

172-
if tc.name == "sstHistoryGreaterThanEng" {
173-
174-
t.Log("ok")
175-
}
176-
177195
local, _, _ := storageutils.MakeSST(t, st, eng)
178196
require.NoError(t, fs.WriteFile(engine.Env(), "local", local, fs.UnspecifiedWriteCategory))
179197
require.NoError(t, engine.IngestLocalFiles(ctx, []string{"local"}))
@@ -256,3 +274,82 @@ func addRandomDeletes(
256274

257275
return engKVs, sstKVs
258276
}
277+
278+
func TestMVCCComputeStatsDiffEstimates(t *testing.T) {
279+
defer leaktest.AfterTest(t)()
280+
defer log.Scope(t).Close(t)
281+
282+
ctx := context.Background()
283+
284+
st := cluster.MakeTestingClusterSettings()
285+
286+
engine := storage.NewDefaultInMemForTesting()
287+
defer engine.Close()
288+
289+
testCases := []struct {
290+
name string
291+
// sst describes an sst that the test will run ComputeStatsDiff on
292+
sst storageutils.KVs
293+
294+
// eng describes the keys in the existing key space
295+
eng storageutils.KVs
296+
}{
297+
{
298+
name: "extraVersions",
299+
sst: pFixed("a2a1"),
300+
eng: pFixed("a2"),
301+
},
302+
{
303+
name: "extraVersionsMoreData",
304+
sst: pFixed("a2a1"),
305+
eng: pFixed("a2b1"),
306+
},
307+
{
308+
name: "missingNewVersion",
309+
sst: pFixed("a1"),
310+
eng: pFixed("a2"),
311+
},
312+
{
313+
name: "missingNewVersionMoreData",
314+
sst: pFixed("a1"),
315+
eng: pFixed("a2b1"),
316+
},
317+
{
318+
name: "seekToExtraVersions",
319+
sst: pFixed("a2a1"),
320+
eng: pFixed("a4a3a2"),
321+
},
322+
{
323+
name: "hole",
324+
sst: pFixed("a2"),
325+
eng: pFixed("a3a1"),
326+
},
327+
}
328+
329+
for _, tc := range testCases {
330+
t.Run(tc.name, func(t *testing.T) {
331+
332+
// Clear the engine before each test, so stats collection for each test
333+
// is independent.
334+
require.NoError(t, engine.Excise(ctx, roachpb.Span{Key: keys.LocalMax, EndKey: roachpb.KeyMax}))
335+
336+
eng := tc.eng
337+
sst := tc.sst
338+
339+
local, _, _ := storageutils.MakeSST(t, st, eng)
340+
require.NoError(t, fs.WriteFile(engine.Env(), "local", local, fs.UnspecifiedWriteCategory))
341+
require.NoError(t, engine.IngestLocalFiles(ctx, []string{"local"}))
342+
343+
now := int64(timeutil.Now().Nanosecond())
344+
345+
sstEncoded, startUnversioned, endUnversioned := storageutils.MakeSST(t, st, sst)
346+
start := storage.MVCCKey{Key: startUnversioned}
347+
end := storage.MVCCKey{Key: endUnversioned}
348+
updateTime := now + 1
349+
350+
_, err := storage.ComputeSSTStatsDiff(
351+
ctx, sstEncoded, engine, updateTime, start, end)
352+
require.ErrorContains(t, err, storage.ComputeStatsDiffViolation.Error())
353+
})
354+
}
355+
}

0 commit comments

Comments
 (0)