Skip to content

Commit eef8437

Browse files
committed
storage: introduce ComputeSSTStatsDiff
This patch adds the new ComputeSSTStatsDiff utility which will be called in batcheval.EvalAddSSTable in a future PR to ingest an sst over non empty key space with an accurate stats update. Informs #145548 Release note: none
1 parent 9963ba7 commit eef8437

File tree

3 files changed

+540
-0
lines changed

3 files changed

+540
-0
lines changed

pkg/storage/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ go_test(
148148
"pebble_mvcc_scanner_test.go",
149149
"pebble_test.go",
150150
"read_as_of_iterator_test.go",
151+
"sst_stats_diff_test.go",
151152
"sst_test.go",
152153
"sst_writer_test.go",
153154
"store_properties_test.go",

pkg/storage/sst.go

Lines changed: 284 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,290 @@ func NewMultiMemSSTIterator(ssts [][]byte, verify bool, opts IterOptions) (MVCCI
7575
return iter, nil
7676
}
7777

78+
// ComputeSSTStatsDiffReaderHasRangeKeys is returned if the reader passed to
79+
// ComputeSSTSTatsDiff contains range keys in the span [start, end).
80+
var ComputeSSTStatsDiffReaderHasRangeKeys = errors.New("engine contains rangekeys")
81+
82+
func errorIfReaderContainsRangeKeys(
83+
ctx context.Context, reader Reader, start, UpperBound roachpb.Key,
84+
) error {
85+
engRKIter, err := reader.NewMVCCIterator(ctx, MVCCKeyIterKind, IterOptions{
86+
KeyTypes: IterKeyTypeRangesOnly,
87+
UpperBound: UpperBound,
88+
})
89+
if err != nil {
90+
return err
91+
}
92+
defer engRKIter.Close()
93+
engRKIter.SeekGE(MVCCKey{Key: start})
94+
if ok, err := engRKIter.Valid(); err != nil {
95+
return err
96+
} else if ok {
97+
return ComputeSSTStatsDiffReaderHasRangeKeys
98+
}
99+
return nil
100+
}
101+
102+
// ComputeSSTStatsDiff computes a diff of the key span's mvcc stats if the sst
103+
// were applied. Note, the incoming sst must not contain any range keys. The key
104+
// span must be contained in the global keyspace.
105+
//
106+
// This function assumes that if an engine key overlaps with an sst key
107+
// (i.e. engKey.Key == iterKey.Key), the sst key shadows the latest eng key or
108+
// is a duplicate. Here are two valid examples:
109+
//
110+
// 1. sst: a2, a1, eng: a4, a3, a2, a1
111+
// 2. sst: a4, a3, a2 eng: a2, a1
112+
//
113+
// The function cannot handle the following case: sst: a1, eng: a2
114+
//
115+
// Overall control flow:
116+
//
117+
// In an iteration:
118+
//
119+
// Ensure engKey.Key is geq iterKey.Key, to detect if there exists a key in the
120+
// engine that overlaps with the sst key.
121+
//
122+
// Detect duplicates in the sst: If engKey.Key == iterKey.Key and
123+
// engKey.Timestamp >= iterKey.Timestamp, advance both iterators to the next
124+
// roachpb key, assume the remaining versions of the key in the sst are
125+
// duplicates, and thus will not contribute to stats. Jump to top.
126+
//
127+
// At this point, the current sstKey will contribute to stats: it either shadows
128+
// an eng key or no eng key overlaps with it.
129+
//
130+
// Call sstIter.Next()
131+
//
132+
// TODO(msbutler): Currently, this helper throws an error if the engine contains
133+
// range keys. support range keys in the engine.
134+
func ComputeSSTStatsDiff(
135+
ctx context.Context, sst []byte, reader Reader, nowNanos int64, start, end MVCCKey,
136+
) (enginepb.MVCCStats, error) {
137+
138+
var ms enginepb.MVCCStats
139+
140+
// Ensure we're not iterating over local keys.
141+
if start.Key.Compare(keys.LocalMax) < 0 {
142+
return ms, errors.New("start key must be greater than LocalMax")
143+
}
144+
145+
// Ensure there are no range keys in the SST
146+
rkIter, err := NewMemSSTIterator(sst, false /* verify */, IterOptions{
147+
KeyTypes: IterKeyTypeRangesOnly,
148+
UpperBound: keys.MaxKey,
149+
})
150+
if err != nil {
151+
return ms, err
152+
}
153+
defer rkIter.Close()
154+
rkIter.SeekGE(NilKey)
155+
if ok, err := rkIter.Valid(); err != nil {
156+
return ms, err
157+
} else if ok {
158+
return ms, errors.New("SST contains range keys")
159+
}
160+
161+
// Ensure the engine has no range keys.
162+
//
163+
// TODO(msbutler): remove this check once we support range keys in the engine.
164+
if err := errorIfReaderContainsRangeKeys(ctx, reader, start.Key, end.Key); err != nil {
165+
return ms, err
166+
}
167+
168+
engIter, err := reader.NewMVCCIterator(ctx, MVCCKeyIterKind, IterOptions{
169+
KeyTypes: IterKeyTypePointsOnly,
170+
useL6Filters: true,
171+
ReadCategory: fs.BatchEvalReadCategory,
172+
UpperBound: end.Key,
173+
})
174+
if err != nil {
175+
return ms, err
176+
}
177+
defer engIter.Close()
178+
179+
var engIterKey MVCCKey
180+
181+
// setEngIterKey sets the engIterKey to the next key in the engine that is
182+
// greater than or equal to the passed in unversioned nextSSTKey. When
183+
// computing the stats impact of an incoming sst key, we need to understand if
184+
// it overlaps existing keys. If such a key in the engine exists, this helper
185+
// updates the engineIterKey to the latest live version of that key.
186+
setEngIterKey := func(nexSSTKey roachpb.Key) error {
187+
engIter.SeekGE(MVCCKey{Key: nexSSTKey})
188+
if ok, err := engIter.Valid(); err != nil {
189+
return err
190+
} else if !ok {
191+
// When the eng iterator is exausted, the remaining sst keys are ingesting
192+
// in empty key space. To ensure the key comparison below never treats the
193+
// sst keys as shadowing eng keys, set the eng key to max.
194+
engIterKey = MVCCKeyMax
195+
} else {
196+
engIterKey = engIter.UnsafeKey()
197+
}
198+
return nil
199+
}
200+
201+
if err := setEngIterKey(start.Key); err != nil {
202+
return ms, err
203+
}
204+
205+
sstIter, err := NewMemSSTIterator(sst, false, IterOptions{
206+
KeyTypes: IterKeyTypePointsOnly,
207+
UpperBound: end.Key,
208+
})
209+
if err != nil {
210+
return ms, err
211+
}
212+
defer sstIter.Close()
213+
214+
sstIter.SeekGE(start)
215+
if ok, err := sstIter.Valid(); err != nil {
216+
return ms, err
217+
} else if !ok {
218+
return ms, errors.New("SST is empty")
219+
}
220+
221+
// processDuplicates advances the sst iterator to the next roachpb key, as all
222+
// remaining versions of this sst key should not contribute to stats.
223+
processDuplicates := func() error {
224+
// TODO (msbutler): detect if the sst contains a version of the key not in
225+
// the engine, and if so, increment ContainsEstimates.
226+
sstIter.NextKey()
227+
return nil
228+
}
229+
230+
prevSSTKey := NilKey
231+
for {
232+
if ok, err := sstIter.Valid(); err != nil {
233+
return ms, err
234+
} else if !ok {
235+
break
236+
}
237+
sstIterKey := sstIter.UnsafeKey()
238+
239+
// To understand if this sst key overlaps with an eng key, advance the eng
240+
// iterator to the live key at or after the sst key.
241+
if sstIterKey.Key.Compare(engIterKey.Key) > 0 {
242+
if err := setEngIterKey(sstIterKey.Key); err != nil {
243+
return ms, err
244+
}
245+
}
246+
sstKeySameAsEng := engIterKey.Key.Compare(sstIterKey.Key) == 0
247+
248+
// If engKey shadows sstKey, all remaining versions of the sst key will not
249+
// contribute to stats. Advance the sstIter to the next key. As an example:
250+
//
251+
// sst: a2, a1
252+
// eng: a4, a3, a2, a1
253+
if sstKeySameAsEng && sstIterKey.Timestamp.LessEq(engIterKey.Timestamp) {
254+
prevSSTKey.Key = append(prevSSTKey.Key[:0], sstIterKey.Key...)
255+
prevSSTKey.Timestamp = sstIterKey.Timestamp
256+
if err := processDuplicates(); err != nil {
257+
return ms, err
258+
}
259+
continue
260+
}
261+
262+
// At this point, the sst key shadows the eng key or is landing in empty key
263+
// space, so it must contribute to stats.
264+
sstKeyShadowsEng := sstKeySameAsEng && engIterKey.Timestamp.Less(sstIterKey.Timestamp)
265+
266+
// isMetaKey indicates the current sstKey is the latest version of the key
267+
// in the sst.
268+
isMetaKey := prevSSTKey.Key.Compare(sstIterKey.Key) != 0
269+
270+
sstVal, err := sstIter.UnsafeValue()
271+
if err != nil {
272+
return ms, err
273+
}
274+
sstValueIsTombstone, err := EncodedMVCCValueIsTombstone(sstVal)
275+
if err != nil {
276+
return ms, err
277+
}
278+
279+
sstKeyIsLive := !sstValueIsTombstone && isMetaKey
280+
281+
var metaKeySize int64
282+
if isMetaKey {
283+
metaKeySize = int64(mvccencoding.EncodedMVCCKeyPrefixLength(sstIterKey.Key))
284+
ms.KeyCount++
285+
}
286+
valSize := int64(len(sstVal))
287+
totalSize := metaKeySize + MVCCVersionTimestampSize + valSize
288+
289+
if sstKeyIsLive {
290+
ms.LiveCount++
291+
ms.LiveBytes += totalSize
292+
} else {
293+
// If the sst key is not live, it must contribute to GCBytesAge. If the
294+
// key is a tombstone it accrues GCBytesAge at its own timestamp, else at
295+
// the timestamp which it is shadowed.
296+
nonLiveTime := prevSSTKey.Timestamp.WallTime
297+
if sstValueIsTombstone {
298+
nonLiveTime = sstIterKey.Timestamp.WallTime
299+
}
300+
ms.GCBytesAge += totalSize * (nowNanos/1e9 - nonLiveTime/1e9)
301+
}
302+
303+
ms.KeyBytes += metaKeySize + MVCCVersionTimestampSize
304+
ms.ValBytes += valSize
305+
ms.ValCount++
306+
307+
// Next, subtract off stats if the sst key shadows the eng meta key.
308+
if sstKeyShadowsEng {
309+
engValue, err := engIter.UnsafeValue()
310+
if err != nil {
311+
return ms, err
312+
}
313+
engMetaKeySize := int64(mvccencoding.EncodedMVCCKeyPrefixLength(engIterKey.Key))
314+
engValSize := int64(len(engValue))
315+
316+
engValueIsTombstone, err := EncodedMVCCValueIsTombstone(engValue)
317+
if err != nil {
318+
return ms, err
319+
}
320+
// Except for GCBytesAge on a non tombstone engine key, only decrement
321+
// stats once for each metakey which shadows the eng key.
322+
if isMetaKey {
323+
ms.KeyCount--
324+
ms.KeyBytes -= engMetaKeySize
325+
326+
if engValueIsTombstone {
327+
// If the sst key shadows a tombstone in the engine, we need to remove
328+
// the tombstone's metakey contribution to GCBytesAge, as it is no
329+
// longer a metakey.
330+
ms.GCBytesAge -= engMetaKeySize * (nowNanos/1e9 - engIterKey.Timestamp.WallTime/1e9)
331+
}
332+
}
333+
334+
if !engValueIsTombstone {
335+
// For GCBytesAge when the sst shadows a non tombstone: ideally the
336+
// timestamp diff would be (now - earliestShaddowingSSTKey.Timestamp).
337+
// In other words, if we had sst: a5,a4,a3 and eng a2, the time diff
338+
// would be (n- 3); however it's hard to know what the earliest version
339+
// of key a in the sst that is greater than a2. Instead, we can do a
340+
// little math:
341+
//
342+
// (n-3) == (n-5) + ((5-4) + (4 - 3))
343+
// n-3 == n - 5 + (1 + 1) = n-3
344+
gcBytes := MVCCVersionTimestampSize + engValSize
345+
if isMetaKey {
346+
ms.LiveCount--
347+
ms.LiveBytes -= engMetaKeySize + MVCCVersionTimestampSize + engValSize
348+
ms.GCBytesAge += gcBytes * (nowNanos/1e9 - sstIterKey.Timestamp.WallTime/1e9)
349+
} else {
350+
ms.GCBytesAge += gcBytes * (prevSSTKey.Timestamp.WallTime/1e9 - sstIterKey.Timestamp.WallTime/1e9)
351+
}
352+
}
353+
}
354+
355+
prevSSTKey.Key = append(prevSSTKey.Key[:0], sstIterKey.Key...)
356+
prevSSTKey.Timestamp = sstIterKey.Timestamp
357+
sstIter.Next()
358+
}
359+
return ms, nil
360+
}
361+
78362
// CheckSSTConflicts iterates over an SST and a Reader in lockstep and errors
79363
// out if it finds any conflicts. This includes intents and existing keys with a
80364
// timestamp at or above the SST key timestamp.

0 commit comments

Comments
 (0)