Skip to content

Commit 3cc3a22

Browse files
craig[bot]msbutlerrafiss
committed
147919: batcheval: add ComputeStatsDiff arg to addsstable r=steven a=msbutler ComputeStatsDiff causes the server to compute the effect this sst will have on the range's mvcc stats, even in the presence of overlapping keys. This flag cannot be passed with the MVCCStats, DisallowShadowingBelow, or DisallowShadowing fields. This flag assumes that any key in the sst that is shadowed by a key in the engine is also a duplicate. As an example, accurate stats will be computed here: - sst: a@3,a@2 and eng: a@2,a@1 but not for, as a@1 is not a duplicate: - sst: a@1 and eng: a@2 Informs #145548 Release note: none 148695: roachtest: mark psycopg test as flaky r=rafiss a=rafiss fixes #148343 fixes #146895 Release note: None Co-authored-by: Michael Butler <[email protected]> Co-authored-by: Rafi Shamim <[email protected]>
3 parents 99e41b7 + 1bdd852 + 1d569ed commit 3cc3a22

File tree

5 files changed

+304
-199
lines changed

5 files changed

+304
-199
lines changed

pkg/cmd/roachtest/tests/psycopg_blocklist.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ var psycopgIgnoreList = blocklist{
2424
`tests.pool.test_pool.test_reconnect_after_grow_failed`: "requires insecure mode",
2525
`tests.pool.test_pool.test_reconnect_failure[False]`: "requires insecure mode",
2626
`tests.pool.test_pool.test_refill_on_check`: "requires insecure mode",
27+
`tests.pool.test_pool.test_shrink`: "flaky; see #146895",
2728
`tests.pool.test_pool.test_stats_connect`: "requires insecure mode",
2829
`tests.pool.test_pool_async.test_connect_check_timeout[asyncio]`: "requires insecure mode",
2930
`tests.pool.test_pool_async.test_reconnect[asyncio]`: "requires insecure mode",

pkg/kv/kvpb/api.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2574,3 +2574,17 @@ func validateExclusionTimestampForBatch(ts hlc.Timestamp, h Header) error {
25742574
}
25752575
return nil
25762576
}
2577+
2578+
func (r *AddSSTableRequest) Validate(bh Header) error {
2579+
if r.ComputeStatsDiff {
2580+
if r.DisallowConflicts || r.DisallowShadowingBelow.IsSet() {
2581+
return errors.New(
2582+
"invalid AddSSTableRequest: ComputeStatsDiff cannot be used with DisallowConflicts or DisallowShadowingBelow")
2583+
}
2584+
if r.MVCCStats != nil {
2585+
return errors.New(
2586+
"invalid AddSSTableRequest: ComputeStatsDiff cannot be used with precomputed MVCCStats")
2587+
}
2588+
}
2589+
return nil
2590+
}

pkg/kv/kvpb/api.proto

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2127,6 +2127,26 @@ message AddSSTableRequest {
21272127
//
21282128
// TODO(dt,msbutler,bilal): This is unsupported.
21292129
util.hlc.Timestamp ignore_keys_above_timestamp = 12 [(gogoproto.nullable) = false];
2130+
2131+
// ComputeStatsDiff causes the server to compute the effect this
2132+
// SST will have on the range's MVCC stats, even in the presence of
2133+
// overlapping keys. This flag cannot be passed with the MVCCStats,
2134+
// DisallowShadowingBelow, or DisallowShadowing fields.
2135+
//
2136+
// This flag assumes that any key in the sst that is shadowed by a key in the
2137+
// engine is also a duplicate. As an example, accurate stats will be computed
2138+
// here:
2139+
// - sst: a@3,a@2 and eng: a@2,a@1
2140+
//
2141+
// but not for, as a@1 is not a duplicate:
2142+
// - sst: a@1 and eng: a@2
2143+
//
2144+
// In the ladder case, we silently create inaccurate stats currently. A TODO
2145+
// in storage.ComputeSSTStatsDiff is to detect this case and increment
2146+
// ContainsEstimates. At least for PCR, the first client of this flag, we
2147+
// expect the edge case to be quite rare, and would only occur if we're
2148+
// ingesting data older than the ingesting range's GC TTL.
2149+
bool compute_stats_diff = 13 [(gogoproto.customname) = "ComputeStatsDiff"];
21302150

21312151
reserved 10, 11;
21322152
}

pkg/kv/kvserver/batcheval/cmd_add_sstable.go

Lines changed: 148 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,11 @@ func EvalAddSSTable(
130130
) (result.Result, error) {
131131
args := cArgs.Args.(*kvpb.AddSSTableRequest)
132132
h := cArgs.Header
133+
134+
if err := args.Validate(h); err != nil {
135+
return result.Result{}, err
136+
}
137+
133138
ms := cArgs.Stats
134139
start, end := storage.MVCCKey{Key: args.Key}, storage.MVCCKey{Key: args.EndKey}
135140
sst := args.Data
@@ -172,6 +177,10 @@ func EvalAddSSTable(
172177
}
173178
}
174179

180+
if err := checkSSTSpanBounds(ctx, sst, start, end); err != nil {
181+
return result.Result{}, err
182+
}
183+
175184
// If requested and necessary, rewrite the SST's MVCC timestamps to the
176185
// request timestamp. This ensures the writes comply with the timestamp cache
177186
// and closed timestamp, i.e. by not writing to timestamps that have already
@@ -194,10 +203,16 @@ func EvalAddSSTable(
194203
}
195204
}
196205

197-
var statsDelta enginepb.MVCCStats
198206
maxLockConflicts := storage.MaxConflictsPerLockConflictError.Get(&cArgs.EvalCtx.ClusterSettings().SV)
199207
targetLockConflictBytes := storage.TargetBytesPerLockConflictError.Get(&cArgs.EvalCtx.ClusterSettings().SV)
200208
checkConflicts := args.DisallowConflicts || !args.DisallowShadowingBelow.IsEmpty()
209+
210+
// checkConflictsStatsDelta is a delta between the SST-only statistics and
211+
// their effect when applied. In other words:
212+
//
213+
// sstOnlyStats + checkConflictsStatsDelta equals the actual stats
214+
// contribution of the sst.
215+
var checkConflictsStatsDelta enginepb.MVCCStats
201216
if checkConflicts {
202217
// If requested, check for MVCC conflicts with existing keys. This enforces
203218
// all MVCC invariants by returning WriteTooOldError for any existing
@@ -207,10 +222,10 @@ func EvalAddSSTable(
207222
// Additionally, if DisallowShadowingBelow is set, it will not write
208223
// above existing/visible values (but will write above tombstones).
209224
//
210-
// If the overlap between the ingested SST and the engine is large (i.e.
211-
// the SST is wide in keyspace), or if the ingested SST is very small,
212-
// use prefix seeks in CheckSSTConflicts. This ends up being more performant
213-
// as it avoids expensive seeks with index/data block loading in the common
225+
// If the overlap between the ingested SST and the engine is large (i.e. the
226+
// SST is wide in keyspace), or if the ingested SST is very small, use
227+
// prefix seeks in CheckSSTConflicts. This ends up being more performant as
228+
// it avoids expensive seeks with index/data block loading in the common
214229
// case of no conflicts.
215230
usePrefixSeek := false
216231
bytes, err := cArgs.EvalCtx.GetApproximateDiskBytes(start.Key, end.Key)
@@ -229,9 +244,9 @@ func EvalAddSSTable(
229244
args.Key, args.EndKey, desc.StartKey.AsRawKey(), desc.EndKey.AsRawKey())
230245

231246
log.VEventf(ctx, 2, "checking conflicts for SSTable [%s,%s)", start.Key, end.Key)
232-
statsDelta, err = storage.CheckSSTConflicts(ctx, sst, readWriter, start, end, leftPeekBound, rightPeekBound,
247+
checkConflictsStatsDelta, err = storage.CheckSSTConflicts(ctx, sst, readWriter, start, end, leftPeekBound, rightPeekBound,
233248
args.DisallowShadowingBelow, sstTimestamp, maxLockConflicts, targetLockConflictBytes, usePrefixSeek)
234-
statsDelta.Add(sstReqStatsDelta)
249+
checkConflictsStatsDelta.Add(sstReqStatsDelta)
235250
if err != nil {
236251
return result.Result{}, errors.Wrap(err, "checking for key collisions")
237252
}
@@ -250,112 +265,57 @@ func EvalAddSSTable(
250265
}
251266
}
252267

253-
// Verify that the keys in the sstable are within the range specified by the
254-
// request header, and if the request did not include pre-computed stats,
255-
// compute the expected MVCC stats delta of ingesting the SST.
256-
sstIter, err := storage.NewMemSSTIterator(sst, true /* verify */, storage.IterOptions{
257-
KeyTypes: storage.IterKeyTypePointsAndRanges,
258-
LowerBound: keys.MinKey,
259-
UpperBound: keys.MaxKey,
260-
})
261-
if err != nil {
262-
return result.Result{}, err
263-
}
264-
defer sstIter.Close()
265-
266-
// Check that the first key is in the expected range.
267-
sstIter.SeekGE(storage.MVCCKey{Key: keys.MinKey})
268-
if ok, err := sstIter.Valid(); err != nil {
269-
return result.Result{}, err
270-
} else if ok {
271-
if unsafeKey := sstIter.UnsafeKey(); unsafeKey.Less(start) {
272-
return result.Result{}, errors.Errorf("first key %s not in request range [%s,%s)",
273-
unsafeKey.Key, start.Key, end.Key)
274-
}
275-
}
276-
277-
// Get the MVCCStats for the SST being ingested.
278-
var stats enginepb.MVCCStats
279-
if args.MVCCStats != nil {
280-
stats = *args.MVCCStats
281-
} else {
282-
log.VEventf(ctx, 2, "computing MVCCStats for SSTable [%s,%s)", start.Key, end.Key)
283-
stats, err = storage.ComputeStatsForIter(sstIter, h.Timestamp.WallTime)
284-
if err != nil {
285-
return result.Result{}, errors.Wrap(err, "computing SSTable MVCC stats")
286-
}
287-
}
288-
289-
sstIter.SeekGE(end)
290-
if ok, err := sstIter.Valid(); err != nil {
291-
return result.Result{}, err
292-
} else if ok {
293-
return result.Result{}, errors.Errorf("last key %s not in request range [%s,%s)",
294-
sstIter.UnsafeKey(), start.Key, end.Key)
295-
}
296-
297-
// The above MVCCStats represents what is in this new SST.
298-
//
299-
// *If* the keys in the SST do not conflict with keys currently in this range,
300-
// then adding the stats for this SST to the range stats should yield the
301-
// correct overall stats.
302-
//
303-
// *However*, if the keys in this range *do* overlap with keys already in this
304-
// range, then adding the SST semantically *replaces*, rather than adds, those
305-
// keys, and the net effect on the stats is not so simple.
306-
//
307-
// To perfectly compute the correct net stats, you could a) determine the
308-
// stats for the span of the existing range that this SST covers and subtract
309-
// it from the range's stats, then b) use a merging iterator that reads from
310-
// the SST and then underlying range and compute the stats of that merged
311-
// span, and then add those stats back in. That would result in correct stats
312-
// that reflect the merging semantics when the SST "shadows" an existing key.
313-
//
314-
// If the underlying range is mostly empty, this isn't terribly expensive --
315-
// computing the existing stats to subtract is cheap, as there is little or no
316-
// existing data to traverse and b) is also pretty cheap -- the merging
317-
// iterator can quickly iterate the in-memory SST.
268+
// stats will represent the contribution of MVCC stats from the SST to the
269+
// range. We assume these stats are not estimates if we:
318270
//
319-
// However, if the underlying range is _not_ empty, then this is not cheap:
320-
// recomputing its stats involves traversing lots of data, and iterating the
321-
// merged iterator has to constantly go back and forth to the iterator.
271+
// 1. Call ComputeSSTStatsDiff below, which computes the exact stats diff of
272+
// the sst, even in the presence of duplicate, shadowing, and shadowed keys in
273+
// the ingesting keyspace. Note that this computation will throw an error if
274+
// there is a range key in the _incoming sst_.
322275
//
323-
// If we assume that most SSTs don't shadow too many keys, then the error of
324-
// simply adding the SST stats directly to the range stats is minimal. In the
325-
// worst-case, when we retry a whole SST, then it could be overcounting the
326-
// entire file, but we can hope that that is rare. In the worst case, it may
327-
// cause splitting an under-filled range that would later merge when the
328-
// over-count is fixed.
276+
// TODO(msbutler): In the first iteration, mvcc stats estimates will be
277+
// returned if the _underlying_ range contains range keys.
329278
//
330-
// We can indicate that these stats contain this estimation using the flag in
331-
// the MVCC stats so that later re-computations will not be surprised to find
332-
// any discrepancies.
279+
// 2. Call for CheckForSSTConflicts above, which asserts the sst does not
280+
// conflict with any underlying keys, as defined by DisallowShadowing and
281+
// DisallowShadowingBelow. Note that the checkConflictsStatsDelta is a delta
282+
// between the SST-only statistics and their effect when applied, which when
283+
// added to the SST statistics, will adjust them for existing keys and values.
284+
// Thus, we still need to compute the sst-only statistics on this branch. Also
285+
// note that CheckForSSTConflicts can still return estimates in certain corner
286+
// cases.
333287
//
334-
// Callers can trigger such a re-computation to fixup any discrepancies (and
335-
// remove the ContainsEstimates flag) after they are done ingesting files by
336-
// sending an explicit recompute.
337-
//
338-
// There is a significant performance win to be achieved by ensuring that the
339-
// stats computed are not estimates as it prevents recomputation on splits.
340-
// Running AddSSTable with disallowShadowing=true gets us close to this as we
341-
// do not allow colliding keys to be ingested. However, in the situation that
342-
// two SSTs have KV(s) which "perfectly" shadow an existing key (equal ts and
343-
// value), we do not consider this a collision. While the KV would just
344-
// overwrite the existing data, the stats would be added to the cumulative
345-
// stats of the AddSSTable command, causing a double count for such KVs.
346-
// Therefore, we compute the stats for these "skipped" KVs on-the-fly while
347-
// checking for the collision condition in C++ and subtract them from the
348-
// stats of the SST being ingested before adding them to the running
349-
// cumulative for this command. These stats can then be marked as accurate.
288+
// While computing stats in this request requires a potentially expensive scan
289+
// of the range, it ensures a healthy allocator that makes good decisions on
290+
// when to split/merge or gc a range (etc). Stats estimates may not be harmful
291+
// if we assume that most SSTs don't shadow too many keys, so the error of
292+
// simply adding the SST stats directly to the range stats is minimal.
293+
var stats enginepb.MVCCStats
294+
log.VEventf(ctx, 2, "computing MVCCStats for SSTable [%s,%s)", start.Key, end.Key)
350295
if checkConflicts {
351-
stats.Add(statsDelta)
352-
if statsDelta.ContainsEstimates == 0 {
353-
stats.ContainsEstimates = 0
296+
stats.Add(checkConflictsStatsDelta)
297+
}
298+
if args.ComputeStatsDiff {
299+
statsDiff, err := computeSSTStatsDiffWithFallback(ctx, sst, readWriter, h.Timestamp.WallTime, start, end)
300+
if err != nil {
301+
return result.Result{}, errors.Wrap(err, "computing SST stats diff")
302+
}
303+
stats.Add(statsDiff)
304+
} else if args.MVCCStats != nil {
305+
stats.Add(*args.MVCCStats)
306+
if !checkConflicts {
307+
stats.ContainsEstimates++
354308
}
355309
} else {
356-
stats.ContainsEstimates++
310+
sstStats, err := computeSSTStats(ctx, sst, h.Timestamp.WallTime)
311+
if err != nil {
312+
return result.Result{}, errors.Wrap(err, "computing SST stats")
313+
}
314+
stats.Add(sstStats)
315+
if !checkConflicts {
316+
stats.ContainsEstimates++
317+
}
357318
}
358-
359319
ms.Add(stats)
360320

361321
var mvccHistoryMutation *kvserverpb.ReplicatedEvalResult_MVCCHistoryMutation
@@ -505,6 +465,89 @@ func EvalAddSSTable(
505465
}, nil
506466
}
507467

468+
func computeSSTStatsDiffWithFallback(
469+
ctx context.Context,
470+
sst []byte,
471+
readWriter storage.ReadWriter,
472+
nowNanos int64,
473+
start, end storage.MVCCKey,
474+
) (enginepb.MVCCStats, error) {
475+
stats, err := storage.ComputeSSTStatsDiff(
476+
ctx, sst, readWriter, nowNanos, start, end)
477+
if errors.Is(err, storage.ComputeSSTStatsDiffReaderHasRangeKeys) {
478+
// Fall back to stats estimates if there are range keys in the engine.
479+
log.VEventf(ctx, 2, "computing SST stats as estimates after detecting range keys in engine")
480+
sstStats, err := computeSSTStats(ctx, sst, nowNanos)
481+
if err != nil {
482+
return enginepb.MVCCStats{}, errors.Wrap(err, "computing SST stats after detecting range keys in engine")
483+
}
484+
sstStats.ContainsEstimates = 1
485+
return sstStats, nil
486+
} else if err != nil {
487+
return enginepb.MVCCStats{}, err
488+
}
489+
return stats, nil
490+
}
491+
492+
func computeSSTStats(ctx context.Context, sst []byte, nowNanos int64) (enginepb.MVCCStats, error) {
493+
sstIter, err := storage.NewMemSSTIterator(sst, true /* verify */, storage.IterOptions{
494+
KeyTypes: storage.IterKeyTypePointsAndRanges,
495+
LowerBound: keys.MinKey,
496+
UpperBound: keys.MaxKey,
497+
})
498+
if err != nil {
499+
return enginepb.MVCCStats{}, err
500+
}
501+
defer sstIter.Close()
502+
503+
sstIter.SeekGE(storage.MVCCKey{Key: keys.MinKey})
504+
if ok, err := sstIter.Valid(); err != nil {
505+
return enginepb.MVCCStats{}, err
506+
} else if ok {
507+
// TODO(msbutler): this implies we tolerate ingesting an empty addstable.
508+
// Perhaps we should reject empty addstable requests?
509+
sstStats, err := storage.ComputeStatsForIter(sstIter, nowNanos)
510+
if err != nil {
511+
return enginepb.MVCCStats{}, errors.Wrap(err, "computing SSTable MVCC stats")
512+
}
513+
return sstStats, nil
514+
}
515+
return enginepb.MVCCStats{}, nil
516+
}
517+
518+
// checkSSTSpanBounds verifies that the keys in the sstable are within the
519+
// span specified by [start, end].
520+
func checkSSTSpanBounds(ctx context.Context, sst []byte, start, end storage.MVCCKey) error {
521+
sstIter, err := storage.NewMemSSTIterator(sst, true /* verify */, storage.IterOptions{
522+
KeyTypes: storage.IterKeyTypePointsAndRanges,
523+
LowerBound: keys.MinKey,
524+
UpperBound: keys.MaxKey,
525+
})
526+
if err != nil {
527+
return err
528+
}
529+
defer sstIter.Close()
530+
531+
// Check that the first key is in the expected span.
532+
sstIter.SeekGE(storage.MVCCKey{Key: keys.MinKey})
533+
if ok, err := sstIter.Valid(); err != nil {
534+
return err
535+
} else if ok {
536+
if unsafeKey := sstIter.UnsafeKey(); unsafeKey.Less(start) {
537+
return errors.Errorf("first key %s not in request range [%s,%s)",
538+
unsafeKey.Key, start.Key, end.Key)
539+
}
540+
}
541+
sstIter.SeekGE(end)
542+
if ok, err := sstIter.Valid(); err != nil {
543+
return err
544+
} else if ok {
545+
return errors.Errorf("last key %s not in request range [%s,%s)",
546+
sstIter.UnsafeKey(), start.Key, end.Key)
547+
}
548+
return err
549+
}
550+
508551
// assertSSTContents checks that the SST contains expected inputs:
509552
//
510553
// * Only SST set operations (not explicitly verified).

0 commit comments

Comments
 (0)