@@ -100,18 +100,21 @@ func errorIfReaderContainsRangeKeys(
100
100
return nil
101
101
}
102
102
103
+ var ComputeStatsDiffViolation = errors .New ("ComputeStatsDiff assumptions violated" )
104
+
103
105
// ComputeSSTStatsDiff computes a diff of the key span's mvcc stats if the sst
104
106
// were applied. Note, the incoming sst must not contain any range keys. The key
105
107
// span must be contained in the global keyspace.
106
108
//
107
- // This function assumes that if an engine key overlaps with an sst key
108
- // (i.e. engKey.Key == iterKey.Key), the sst key shadows the latest eng key or
109
- // is a duplicate. Here are two valid examples:
109
+ // This function can only compute accurate stats if an engine key overlaps with
110
+ // an sst key (i.e. engKey.Key == iterKey.Key), the sst key shadows the latest
111
+ // eng key or is a duplicate. If the sst violates this assumption, an error is
112
+ // thrown. Here are two valid examples:
110
113
//
111
114
// 1. sst: a2, a1, eng: a4, a3, a2, a1
112
115
// 2. sst: a4, a3, a2 eng: a2, a1
113
116
//
114
- // The function cannot handle the following case: sst: a1, eng: a2
117
+ // The function cannot handle the following case: sst: a1, eng: a2.
115
118
//
116
119
// Overall control flow:
117
120
//
@@ -121,17 +124,16 @@ func errorIfReaderContainsRangeKeys(
121
124
// engine that overlaps with the sst key.
122
125
//
123
126
// Detect duplicates in the sst: If engKey.Key == iterKey.Key and
124
- // engKey.Timestamp >= iterKey.Timestamp, advance both iterators to the next
125
- // roachpb key, assume the remaining versions of the key in the sst are
126
- // duplicates, and thus will not contribute to stats. Jump to top.
127
+ // engKey.Timestamp >= iterKey.Timestamp, iterate through remaining mvcc
128
+ // versions in the sst to ensure assumptions are valid. Jump to top.
127
129
//
128
130
// At this point, the current sstKey will contribute to stats: it either shadows
129
131
// an eng key or no eng key overlaps with it.
130
132
//
131
133
// Call sstIter.Next()
132
134
//
133
135
// TODO(msbutler): Currently, this helper throws an error if the engine contains
134
- // range keys. support range keys in the engine.
136
+ // range keys. Support range keys in the engine.
135
137
func ComputeSSTStatsDiff (
136
138
ctx context.Context , sst []byte , reader Reader , nowNanos int64 , start , end MVCCKey ,
137
139
) (enginepb.MVCCStats , error ) {
@@ -170,7 +172,6 @@ func ComputeSSTStatsDiff(
170
172
171
173
engIter , err := reader .NewMVCCIterator (ctx , MVCCKeyIterKind , IterOptions {
172
174
KeyTypes : IterKeyTypePointsOnly ,
173
- useL6Filters : true ,
174
175
ReadCategory : fs .BatchEvalReadCategory ,
175
176
UpperBound : end .Key ,
176
177
})
@@ -179,7 +180,9 @@ func ComputeSSTStatsDiff(
179
180
}
180
181
defer engIter .Close ()
181
182
182
- var engIterKey MVCCKey
183
+ var (
184
+ engIterKey , sstIterKey , prevSSTIterKey MVCCKey
185
+ )
183
186
184
187
// setEngIterKey sets the engIterKey to the next key in the engine that is
185
188
// greater than or equal to the passed in unversioned nextSSTKey. When
@@ -221,23 +224,100 @@ func ComputeSSTStatsDiff(
221
224
return ms , errors .New ("SST is empty" )
222
225
}
223
226
224
- // processDuplicates advances the sst iterator to the next roachpb key, as all
225
- // remaining versions of this sst key should not contribute to stats.
227
+ // processDuplicates returns an error if the incoming sst violates an
228
+ // assumption required to compute accurate stats (see top level function
229
+ // comment for details).
226
230
processDuplicates := func () error {
227
- // TODO (msbutler): detect if the sst contains a version of the key not in
228
- // the engine, and if so, increment ContainsEstimates.
229
- sstIter .NextKey ()
230
- return nil
231
+
232
+ checkEngIterValid := func () error {
233
+ ok , err := engIter .Valid ()
234
+ if err != nil {
235
+ return err
236
+ }
237
+ if ! ok {
238
+ // We've exhausted the eng iter after detecting sstKey == engKey, but
239
+ // there there exists a valid sstIter key that is not already in the
240
+ // engine.
241
+ //
242
+ // sst: a2
243
+ // eng: a3
244
+ return ComputeStatsDiffViolation
245
+
246
+ }
247
+ return nil
248
+ }
249
+
250
+ // First advance the engine iterator to the sstIterator mvcc key. Recall we
251
+ // entered this function with the following properties: sstKey == engKey and
252
+ // engKeyTimestamp is equal to or more recent than sstKeyTimestamp-- i.e.
253
+ // a4, a3, or a2 below:
254
+ //
255
+ // sst: a2, a1
256
+ // eng: a4, a3, a2, a1
257
+ //
258
+ // Pebble's SeekGE has significant overhead if advancing to the
259
+ // desirable key requires only a couple Next() calls. Since we do not
260
+ // expect the engine to have that many versions of the key, try to
261
+ // advance the iterator with 5 next calls, then fall back to the more
262
+ // expensive SeekGE.
263
+ nextCount := 0
264
+ for {
265
+ if engIterKey .Compare (sstIterKey ) >= 0 {
266
+ break
267
+ }
268
+ if nextCount > 5 {
269
+ engIter .SeekGE (sstIterKey )
270
+ } else {
271
+ engIter .Next ()
272
+ nextCount ++
273
+ }
274
+ if err := checkEngIterValid (); err != nil {
275
+ return err
276
+ }
277
+ engIterKey = engIter .UnsafeKey ()
278
+ }
279
+
280
+ for {
281
+ // At the top of the loop, both iterators are valid, and should
282
+ // be equal if shadowing assumptions are held.
283
+ if sstIterKey .Compare (engIterKey ) != 0 {
284
+ // The current sstKey does not exist in the engine.
285
+ return ComputeStatsDiffViolation
286
+ }
287
+
288
+ // The current engine and sst keys match. Move to next mvcc verstion.
289
+ if sstIterKey .Key .Compare (prevSSTIterKey .Key ) != 0 {
290
+ prevSSTIterKey .Key = append (prevSSTIterKey .Key [:0 ], sstIterKey .Key ... )
291
+ }
292
+ prevSSTIterKey .Timestamp = sstIterKey .Timestamp
293
+ sstIter .Next ()
294
+
295
+ if ok , err := sstIter .Valid (); ! ok || err != nil {
296
+ return err
297
+ }
298
+ sstIterKey = sstIter .UnsafeKey ()
299
+
300
+ if prevSSTIterKey .Key .Less (sstIterKey .Key ) {
301
+ // sstIterator now lives on the next key, so we have finished processing
302
+ // duplicates.
303
+ return nil
304
+ }
305
+
306
+ engIter .Next ()
307
+ if err := checkEngIterValid (); err != nil {
308
+ return err
309
+ }
310
+ engIterKey = engIter .UnsafeKey ()
311
+ }
231
312
}
232
313
233
- prevSSTKey := NilKey
234
314
for {
235
315
if ok , err := sstIter .Valid (); err != nil {
236
316
return ms , err
237
317
} else if ! ok {
238
318
break
239
319
}
240
- sstIterKey : = sstIter .UnsafeKey ()
320
+ sstIterKey = sstIter .UnsafeKey ()
241
321
242
322
// To understand if this sst key overlaps with an eng key, advance the eng
243
323
// iterator to the live key at or after the sst key.
@@ -254,8 +334,6 @@ func ComputeSSTStatsDiff(
254
334
// sst: a2, a1
255
335
// eng: a4, a3, a2, a1
256
336
if sstKeySameAsEng && sstIterKey .Timestamp .LessEq (engIterKey .Timestamp ) {
257
- prevSSTKey .Key = append (prevSSTKey .Key [:0 ], sstIterKey .Key ... )
258
- prevSSTKey .Timestamp = sstIterKey .Timestamp
259
337
if err := processDuplicates (); err != nil {
260
338
return ms , err
261
339
}
@@ -268,7 +346,7 @@ func ComputeSSTStatsDiff(
268
346
269
347
// isMetaKey indicates the current sstKey is the latest version of the key
270
348
// in the sst.
271
- isMetaKey := prevSSTKey .Key .Compare (sstIterKey .Key ) != 0
349
+ isMetaKey := prevSSTIterKey .Key .Compare (sstIterKey .Key ) != 0
272
350
273
351
sstVal , err := sstIter .UnsafeValue ()
274
352
if err != nil {
@@ -296,7 +374,7 @@ func ComputeSSTStatsDiff(
296
374
// If the sst key is not live, it must contribute to GCBytesAge. If the
297
375
// key is a tombstone it accrues GCBytesAge at its own timestamp, else at
298
376
// the timestamp which it is shadowed.
299
- nonLiveTime := prevSSTKey .Timestamp .WallTime
377
+ nonLiveTime := prevSSTIterKey .Timestamp .WallTime
300
378
if sstValueIsTombstone {
301
379
nonLiveTime = sstIterKey .Timestamp .WallTime
302
380
}
@@ -350,13 +428,14 @@ func ComputeSSTStatsDiff(
350
428
ms .LiveBytes -= engMetaKeySize + MVCCVersionTimestampSize + engValSize
351
429
ms .GCBytesAge += gcBytes * (nowNanos / 1e9 - sstIterKey .Timestamp .WallTime / 1e9 )
352
430
} else {
353
- ms .GCBytesAge += gcBytes * (prevSSTKey .Timestamp .WallTime / 1e9 - sstIterKey .Timestamp .WallTime / 1e9 )
431
+ ms .GCBytesAge += gcBytes * (prevSSTIterKey .Timestamp .WallTime / 1e9 - sstIterKey .Timestamp .WallTime / 1e9 )
354
432
}
355
433
}
356
434
}
357
-
358
- prevSSTKey .Key = append (prevSSTKey .Key [:0 ], sstIterKey .Key ... )
359
- prevSSTKey .Timestamp = sstIterKey .Timestamp
435
+ if isMetaKey {
436
+ prevSSTIterKey .Key = append (prevSSTIterKey .Key [:0 ], sstIterKey .Key ... )
437
+ }
438
+ prevSSTIterKey .Timestamp = sstIterKey .Timestamp
360
439
sstIter .Next ()
361
440
}
362
441
ms .LastUpdateNanos = nowNanos
0 commit comments