Skip to content

Commit 92187e1

Browse files
committed
db: fix cases where SeekPrefixGE prefix doesn't match key
This commit adds `SeekPrefixGE` assertions verifying that the `prefix` actually is the prefix for the `key`. This was not always the case so the offending code paths are adjusted. In the future, we should create a wrapper iterator that verifies this sort of thing. Informs #3794
1 parent 80a5615 commit 92187e1

File tree

7 files changed

+98
-41
lines changed

7 files changed

+98
-41
lines changed

internal/base/comparer.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,12 @@ func (s Split) Prefix(k []byte) []byte {
159159
return k[:i:i]
160160
}
161161

162+
// HasPrefix returns true if the given key has the given prefix.
163+
func (s Split) HasPrefix(prefix, key []byte) bool {
164+
i := s(key)
165+
return bytes.Equal(prefix, key[:i:i])
166+
}
167+
162168
// DefaultSplit is a trivial implementation of Split which always returns the
163169
// full key.
164170
var DefaultSplit Split = func(key []byte) int { return len(key) }

iterator.go

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1476,23 +1476,18 @@ func (i *Iterator) SeekPrefixGE(key []byte) bool {
14761476
}
14771477
// Make a copy of the prefix so that modifications to the key after
14781478
// SeekPrefixGE returns does not affect the stored prefix.
1479-
if cap(i.prefixOrFullSeekKey) < prefixLen {
1480-
i.prefixOrFullSeekKey = make([]byte, prefixLen)
1481-
} else {
1482-
i.prefixOrFullSeekKey = i.prefixOrFullSeekKey[:prefixLen]
1483-
}
14841479
i.hasPrefix = true
1485-
copy(i.prefixOrFullSeekKey, keyPrefix)
1480+
i.prefixOrFullSeekKey = append(i.prefixOrFullSeekKey[:0], keyPrefix...)
14861481

14871482
if lowerBound := i.opts.GetLowerBound(); lowerBound != nil && i.cmp(key, lowerBound) < 0 {
1488-
if p := i.comparer.Split.Prefix(lowerBound); !bytes.Equal(i.prefixOrFullSeekKey, p) {
1483+
if !i.comparer.Split.HasPrefix(i.prefixOrFullSeekKey, lowerBound) {
14891484
i.err = errors.New("pebble: SeekPrefixGE supplied with key outside of lower bound")
14901485
i.iterValidityState = IterExhausted
14911486
return false
14921487
}
14931488
key = lowerBound
14941489
} else if upperBound := i.opts.GetUpperBound(); upperBound != nil && i.cmp(key, upperBound) > 0 {
1495-
if p := i.comparer.Split.Prefix(upperBound); !bytes.Equal(i.prefixOrFullSeekKey, p) {
1490+
if !i.comparer.Split.HasPrefix(i.prefixOrFullSeekKey, upperBound) {
14961491
i.err = errors.New("pebble: SeekPrefixGE supplied with key outside of upper bound")
14971492
i.iterValidityState = IterExhausted
14981493
return false

level_iter.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -655,6 +655,9 @@ func (l *levelIter) SeekGE(key []byte, flags base.SeekGEFlags) *base.InternalKV
655655
}
656656

657657
func (l *levelIter) SeekPrefixGE(prefix, key []byte, flags base.SeekGEFlags) *base.InternalKV {
658+
if invariants.Enabled && !l.split.HasPrefix(prefix, key) {
659+
panic(fmt.Sprintf("invalid prefix %q for key %q", prefix, key))
660+
}
658661
if invariants.Enabled && l.lower != nil && l.cmp(key, l.lower) < 0 {
659662
panic(errors.AssertionFailedf("levelIter SeekGE to key %q violates lower bound %q", key, l.lower))
660663
}

merging_iter.go

Lines changed: 28 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -524,11 +524,9 @@ func (m *mergingIter) nextEntry(l *mergingIterLevel, succKey []byte) error {
524524
// P2. Care is taken to avoid ever advancing the iterator beyond the current
525525
// prefix. If nextEntry is ever invoked while we're already beyond the
526526
// current prefix, we're violating the invariant.
527-
if invariants.Enabled && m.prefix != nil {
528-
if p := m.split.Prefix(l.iterKV.K.UserKey); !bytes.Equal(m.prefix, p) {
529-
m.logger.Fatalf("mergingIter: prefix violation: nexting beyond prefix %q; existing heap root %q\n%s",
530-
m.prefix, l.iterKV, debug.Stack())
531-
}
527+
if invariants.Enabled && m.prefix != nil && !m.split.HasPrefix(m.prefix, l.iterKV.K.UserKey) {
528+
m.logger.Fatalf("mergingIter: prefix violation: nexting beyond prefix %q; existing heap root %q\n%s",
529+
m.prefix, l.iterKV, debug.Stack())
532530
}
533531

534532
oldTopLevel := l.index
@@ -905,6 +903,10 @@ func (m *mergingIter) findPrevEntry() *base.InternalKV {
905903
//
906904
// If an error occurs, seekGE returns the error without setting m.err.
907905
func (m *mergingIter) seekGE(key []byte, level int, flags base.SeekGEFlags) error {
906+
if invariants.Enabled && m.lower != nil && m.heap.cmp(key, m.lower) < 0 {
907+
m.logger.Fatalf("mergingIter: lower bound violation: %s < %s\n%s", key, m.lower, debug.Stack())
908+
}
909+
908910
// When seeking, we can use tombstones to adjust the key we seek to on each
909911
// level. Consider the series of range tombstones:
910912
//
@@ -957,15 +959,11 @@ func (m *mergingIter) seekGE(key []byte, level int, flags base.SeekGEFlags) erro
957959
}
958960

959961
for ; level < len(m.levels); level++ {
960-
if invariants.Enabled && m.lower != nil && m.heap.cmp(key, m.lower) < 0 {
961-
m.logger.Fatalf("mergingIter: lower bound violation: %s < %s\n%s", key, m.lower, debug.Stack())
962-
}
963-
964962
l := &m.levels[level]
965963
if m.prefix != nil {
966964
l.iterKV = l.iter.SeekPrefixGE(m.prefix, key, flags)
967965
if l.iterKV != nil {
968-
if !bytes.Equal(m.prefix, m.split.Prefix(l.iterKV.K.UserKey)) {
966+
if !m.split.HasPrefix(m.prefix, l.iterKV.K.UserKey) {
969967
// Prevent keys without a matching prefix from being added to the heap by setting
970968
// iterKey and iterValue to their zero values before calling initMinHeap.
971969
l.iterKV = nil
@@ -999,7 +997,21 @@ func (m *mergingIter) seekGE(key []byte, level int, flags base.SeekGEFlags) erro
999997
// Based on the containment condition tombstone.End > key, so
1000998
// the assignment to key results in a monotonically
1001999
// non-decreasing key across iterations of this loop.
1002-
//
1000+
if m.prefix != nil && !m.split.HasPrefix(m.prefix, l.tombstone.End) {
1001+
// Any keys with m.prefix on subsequent levels are under the tombstone.
1002+
// We still need to perform the seeks, in case the next seek uses
1003+
// the TrySeekUsingNext flag.
1004+
for level++; level < len(m.levels); level++ {
1005+
l := &m.levels[level]
1006+
if kv := l.iter.SeekPrefixGE(m.prefix, key, flags); kv == nil {
1007+
if err := l.iter.Error(); err != nil {
1008+
return err
1009+
}
1010+
}
1011+
l.iterKV = nil
1012+
}
1013+
break
1014+
}
10031015
// The adjustment of key here can only move it to a larger key.
10041016
// Since the caller of seekGE guaranteed that the original key
10051017
// was greater than or equal to m.lower, the new key will
@@ -1037,17 +1049,18 @@ func (m *mergingIter) SeekPrefixGE(prefix, key []byte, flags base.SeekGEFlags) *
10371049
func (m *mergingIter) SeekPrefixGEStrict(
10381050
prefix, key []byte, flags base.SeekGEFlags,
10391051
) *base.InternalKV {
1052+
if invariants.Enabled && !m.split.HasPrefix(prefix, key) {
1053+
panic(fmt.Sprintf("invalid prefix %q for key %q", prefix, key))
1054+
}
10401055
m.prefix = prefix
10411056
m.err = m.seekGE(key, 0 /* start level */, flags)
10421057
if m.err != nil {
10431058
return nil
10441059
}
10451060

10461061
iterKV := m.findNextEntry()
1047-
if invariants.Enabled && iterKV != nil {
1048-
if !bytes.Equal(m.prefix, m.split.Prefix(iterKV.K.UserKey)) {
1049-
m.logger.Fatalf("mergingIter: prefix violation: returning key %q without prefix %q\n", iterKV, m.prefix)
1050-
}
1062+
if invariants.Enabled && iterKV != nil && !m.split.HasPrefix(m.prefix, iterKV.K.UserKey) {
1063+
m.logger.Fatalf("mergingIter: prefix violation: returning key %q without prefix %q\n", iterKV, m.prefix)
10511064
}
10521065
return iterKV
10531066
}

sstable/reader_iter_single_lvl.go

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -176,8 +176,13 @@ type singleLevelIterator[D any, PD block.DataBlockIterator[D]] struct {
176176
// present, should be used for prefix seeks or not. In some cases it is
177177
// beneficial to skip a filter block even if it exists (eg. if probability of
178178
// a match is high).
179-
useFilterBlock bool
180-
lastBloomFilterMatched bool
179+
useFilterBlock bool
180+
181+
// didNotPositionOnLastSeekGE is set to true if we completed a call to SeekGE
182+
// or SeekPrefixGE without positioning the iterator internally. If this flag
183+
// is set, the TrySeekUsingNext optimization is disabled on the next seek.
184+
// This happens for example when the bloom filter excludes a prefix.
185+
didNotPositionOnLastSeekGE bool
181186

182187
transforms IterTransforms
183188

@@ -665,6 +670,11 @@ func (i *singleLevelIterator[D, PD]) SeekGE(key []byte, flags base.SeekGEFlags)
665670
key = i.lower
666671
}
667672
}
673+
if i.didNotPositionOnLastSeekGE {
674+
// Iterator is not positioned based on last seek.
675+
flags = flags.DisableTrySeekUsingNext()
676+
i.didNotPositionOnLastSeekGE = false
677+
}
668678

669679
if flags.TrySeekUsingNext() {
670680
// The i.exhaustedBounds comparison indicates that the upper bound was
@@ -817,6 +827,12 @@ func (i *singleLevelIterator[D, PD]) SeekPrefixGE(
817827
// TODO(bananabrick): We can optimize away this check for the level iter
818828
// if necessary.
819829
if i.cmp(key, i.lower) < 0 {
830+
if !i.reader.Split.HasPrefix(prefix, i.lower) {
831+
i.err = nil // clear any cached iteration error
832+
// Disable the TrySeekUsingNext optimization next time.
833+
i.didNotPositionOnLastSeekGE = true
834+
return nil
835+
}
820836
key = i.lower
821837
}
822838
}
@@ -826,18 +842,22 @@ func (i *singleLevelIterator[D, PD]) SeekPrefixGE(
826842
func (i *singleLevelIterator[D, PD]) seekPrefixGE(
827843
prefix, key []byte, flags base.SeekGEFlags,
828844
) (kv *base.InternalKV) {
845+
if invariants.Enabled && !i.reader.Split.HasPrefix(prefix, key) {
846+
panic(fmt.Sprintf("invalid prefix %q for key %q", prefix, key))
847+
}
848+
if i.didNotPositionOnLastSeekGE {
849+
// Iterator is not positioned based on last seek.
850+
flags = flags.DisableTrySeekUsingNext()
851+
i.didNotPositionOnLastSeekGE = false
852+
}
853+
829854
// NOTE: prefix is only used for bloom filter checking and not later work in
830855
// this method. Hence, we can use the existing iterator position if the last
831856
// SeekPrefixGE did not fail bloom filter matching.
832857

833858
err := i.err
834859
i.err = nil // clear cached iteration error
835860
if i.useFilterBlock {
836-
if !i.lastBloomFilterMatched {
837-
// Iterator is not positioned based on last seek.
838-
flags = flags.DisableTrySeekUsingNext()
839-
}
840-
i.lastBloomFilterMatched = false
841861
// Check prefix bloom filter.
842862
var mayContain bool
843863
mayContain, i.err = i.bloomFilterMayContain(prefix)
@@ -848,9 +868,10 @@ func (i *singleLevelIterator[D, PD]) seekPrefixGE(
848868
// since the caller was allowed to call Next when SeekPrefixGE returned
849869
// nil. This is no longer allowed.
850870
PD(&i.data).Invalidate()
871+
// Disable the TrySeekUsingNext optimization next time.
872+
i.didNotPositionOnLastSeekGE = true
851873
return nil
852874
}
853-
i.lastBloomFilterMatched = true
854875
}
855876
if flags.TrySeekUsingNext() {
856877
// The i.exhaustedBounds comparison indicates that the upper bound was

sstable/reader_iter_two_lvl.go

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,12 @@ type twoLevelIterator[D any, PD block.DataBlockIterator[D]] struct {
3232
// useFilterBlock controls whether we consult the bloom filter in the
3333
// twoLevelIterator code. Note that secondLevel.useFilterBlock is always
3434
// false - any filtering happens at the top level.
35-
useFilterBlock bool
36-
lastBloomFilterMatched bool
35+
useFilterBlock bool
36+
// didNotPositionOnLastSeekGE is set to true if we completed a call to SeekGE
37+
// or SeekPrefixGE without positioning the iterator internally. If this flag
38+
// is set, the TrySeekUsingNext optimization is disabled on the next seek.
39+
// This happens for example when the bloom filter excludes a prefix.
40+
didNotPositionOnLastSeekGE bool
3741
}
3842

3943
var _ Iterator = (*twoLevelIterator[rowblk.Iter, *rowblk.Iter])(nil)
@@ -371,9 +375,19 @@ func (i *twoLevelIterator[D, PD]) SeekPrefixGE(
371375
// TODO(bananabrick): We can optimize away this check for the level iter
372376
// if necessary.
373377
if i.secondLevel.cmp(key, i.secondLevel.lower) < 0 {
378+
if !i.secondLevel.reader.Split.HasPrefix(prefix, i.secondLevel.lower) {
379+
i.secondLevel.err = nil // clear any cached iteration error
380+
// Disable the TrySeekUsingNext optimization next time.
381+
i.didNotPositionOnLastSeekGE = true
382+
return nil
383+
}
374384
key = i.secondLevel.lower
375385
}
376386
}
387+
if i.didNotPositionOnLastSeekGE {
388+
flags = flags.DisableTrySeekUsingNext()
389+
i.didNotPositionOnLastSeekGE = false
390+
}
377391

378392
// NOTE: prefix is only used for bloom filter checking and not later work in
379393
// this method. Hence, we can use the existing iterator position if the last
@@ -385,8 +399,7 @@ func (i *twoLevelIterator[D, PD]) SeekPrefixGE(
385399
// The twoLevelIterator could be already exhausted. Utilize that when
386400
// trySeekUsingNext is true. See the comment about data-exhausted, PGDE, and
387401
// bounds-exhausted near the top of the file.
388-
filterUsedAndDidNotMatch := i.useFilterBlock && !i.lastBloomFilterMatched
389-
if flags.TrySeekUsingNext() && !filterUsedAndDidNotMatch &&
402+
if flags.TrySeekUsingNext() &&
390403
(i.secondLevel.exhaustedBounds == +1 || (PD(&i.secondLevel.data).IsDataInvalidated() && i.secondLevel.index.IsDataInvalidated())) &&
391404
err == nil {
392405
// Already exhausted, so return nil.
@@ -395,11 +408,6 @@ func (i *twoLevelIterator[D, PD]) SeekPrefixGE(
395408

396409
// Check prefix bloom filter.
397410
if i.useFilterBlock {
398-
if !i.lastBloomFilterMatched {
399-
// Iterator is not positioned based on last seek.
400-
flags = flags.DisableTrySeekUsingNext()
401-
}
402-
i.lastBloomFilterMatched = false
403411
var mayContain bool
404412
mayContain, i.secondLevel.err = i.secondLevel.bloomFilterMayContain(prefix)
405413
if i.secondLevel.err != nil || !mayContain {
@@ -409,9 +417,10 @@ func (i *twoLevelIterator[D, PD]) SeekPrefixGE(
409417
// since the caller was allowed to call Next when SeekPrefixGE returned
410418
// nil. This is no longer allowed.
411419
PD(&i.secondLevel.data).Invalidate()
420+
// Disable the TrySeekUsingNext optimization next time.
421+
i.didNotPositionOnLastSeekGE = true
412422
return nil
413423
}
414-
i.lastBloomFilterMatched = true
415424
}
416425

417426
// Bloom filter matches.

sstable/testdata/virtual_reader_iter

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,11 @@ bounds: [dd#5,SET-ddd#6,SET]
7777
# Check lower bound enforcement during SeekPrefixGE.
7878
iter
7979
seek-prefix-ge d
80+
----
81+
.
82+
83+
iter
84+
seek-prefix-ge dd
8085
next
8186
next
8287
----
@@ -292,6 +297,11 @@ bounds: [dd#5,SET-ddd#6,SET]
292297
# Check lower bound enforcement during SeekPrefixGE.
293298
iter
294299
seek-prefix-ge d
300+
----
301+
.
302+
303+
iter
304+
seek-prefix-ge dd
295305
next
296306
next
297307
----

0 commit comments

Comments
 (0)