Skip to content

Commit f214915

Browse files
committed
storage: avoid eager MVCC scanner value retrieval
Previously pebbleMVCCScanner.getOne eagerly retrieved and decoded values of KVs it considered. If the scanner doesn't need to check for uncertainty, this was unnecessary. Additionally, future work may avoid the value retrieval even when performing uncertainty checks if the value is known to not encode a local timestamp. Epic: none Informs #144715. Release note: none
1 parent 86c4728 commit f214915

File tree

1 file changed

+55
-31
lines changed

1 file changed

+55
-31
lines changed

pkg/storage/pebble_mvcc_scanner.go

Lines changed: 55 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -452,15 +452,16 @@ type pebbleMVCCScanner struct {
452452
// cur* variables store the "current" record we're pointing to. Updated in
453453
// updateCurrent. Note that the timestamp can be clobbered in the case of
454454
// adding an intent from the intent history but is otherwise meaningful.
455-
curUnsafeKey MVCCKey
456-
curRawKey []byte
457-
curUnsafeValue MVCCValue
458-
curRawValue pebble.LazyValue
459-
curRangeKeys MVCCRangeKeyStack
460-
savedRangeKeys MVCCRangeKeyStack
461-
savedRangeKeyVers MVCCRangeKeyVersion
462-
results results
463-
intents pebble.Batch
455+
curUnsafeKey MVCCKey
456+
curRawKey []byte
457+
curUnsafeValue MVCCValue
458+
curRawValue pebble.LazyValue
459+
curRawValueFetched []byte
460+
curRangeKeys MVCCRangeKeyStack
461+
savedRangeKeys MVCCRangeKeyStack
462+
savedRangeKeyVers MVCCRangeKeyVersion
463+
results results
464+
intents pebble.Batch
464465
// mostRecentTS stores the largest timestamp observed that is equal to or
465466
// above the scan timestamp. Only applicable if failOnMoreRecent is true. If
466467
// set and no other error is hit, a WriteToOld error will be returned from
@@ -832,31 +833,14 @@ func (p *pebbleMVCCScanner) getOne(ctx context.Context) (ok, added bool) {
832833
return p.addSynthetic(ctx, p.curUnsafeKey.Key, rkv)
833834
}
834835

835-
// We are eagerly fetching and decoding the value, even though it may be
836-
// too recent. With some care, this could be optimized to be lazy.
837-
v, valid := p.getFromLazyValue()
838-
if !valid {
839-
return false, false
840-
}
841-
842-
uncertaintyCheckRequired := p.checkUncertainty && !p.curUnsafeKey.Timestamp.LessEq(p.ts)
843-
if !p.mvccHeaderRequired(uncertaintyCheckRequired) {
844-
if !p.decodeCurrentValueIgnoringHeader(v) {
845-
return false, false
846-
}
847-
} else if extended, valid := p.tryDecodeCurrentValueSimple(v); !valid {
848-
return false, false
849-
} else if extended {
850-
if !p.decodeCurrentValueExtended(v) {
851-
return false, false
852-
}
853-
}
854-
855836
// ts < read_ts
856837
if p.curUnsafeKey.Timestamp.Less(p.ts) {
857838
// 1. Fast path: there is no intent and our read timestamp is newer
858839
// than the most recent version's timestamp.
859-
return p.add(ctx, p.curUnsafeKey.Key, p.curRawKey, p.curUnsafeValue.Value.RawBytes, v)
840+
if !p.decodeCurrentValue(p.decodeMVCCHeaders) {
841+
return false, false
842+
}
843+
return p.add(ctx, p.curUnsafeKey.Key, p.curRawKey, p.curUnsafeValue.Value.RawBytes, p.curRawValueFetched)
860844
}
861845

862846
// ts == read_ts
@@ -889,7 +873,10 @@ func (p *pebbleMVCCScanner) getOne(ctx context.Context) (ok, added bool) {
889873

890874
// 3. There is no intent and our read timestamp is equal to the most
891875
// recent version's timestamp.
892-
return p.add(ctx, p.curUnsafeKey.Key, p.curRawKey, p.curUnsafeValue.Value.RawBytes, v)
876+
if !p.decodeCurrentValue(p.decodeMVCCHeaders) {
877+
return false, false
878+
}
879+
return p.add(ctx, p.curUnsafeKey.Key, p.curRawKey, p.curUnsafeValue.Value.RawBytes, p.curRawValueFetched)
893880
}
894881

895882
// ts > read_ts
@@ -923,6 +910,9 @@ func (p *pebbleMVCCScanner) getOne(ctx context.Context) (ok, added bool) {
923910
// 5. Our txn's read timestamp is less than the max timestamp
924911
// seen by the txn. We need to check for clock uncertainty
925912
// errors.
913+
if !p.decodeCurrentValue(true /* requireHeader */) {
914+
return false, false
915+
}
926916
localTS := p.curUnsafeValue.GetLocalTimestamp(p.curUnsafeKey.Timestamp)
927917
if p.uncertainty.IsUncertain(p.curUnsafeKey.Timestamp, localTS) {
928918
return p.uncertaintyError(p.curUnsafeKey.Timestamp, localTS), false
@@ -1261,6 +1251,16 @@ func IncludeStartKeyIntoErr(startKey roachpb.Key, err error) error {
12611251
func (p *pebbleMVCCScanner) add(
12621252
ctx context.Context, key roachpb.Key, rawKey []byte, rawValue []byte, mvccRawBytes []byte,
12631253
) (ok, added bool) {
1254+
// TODO(jackson): In some cases, the current code is still too eager with
1255+
// value retrieval and decoding the value into a MVCCValue:
1256+
//
1257+
// - If p.skipLocked, we could check if the key is locked and skip over the
1258+
// KV without ever retrieving the value. It's unclear how common this is.
1259+
// - If p.rawMVCCValues is true, we could skip decoding the value into a
1260+
// MVCCValue. If p.tombstones is true, we would still need to use the
1261+
// iterator's MVCCValueLenAndIsTombstone() method to determine if the
1262+
// value is a tombstone we should skip over.
1263+
12641264
// Don't include deleted versions len(val) == 0, unless we've been instructed
12651265
// to include tombstones in the results.
12661266
if len(rawValue) == 0 && !p.tombstones {
@@ -1601,9 +1601,33 @@ func (p *pebbleMVCCScanner) getFromLazyValue() (v []byte, valid bool) {
16011601
if callerOwned {
16021602
p.lazyValueBuf = v[:0]
16031603
}
1604+
p.curRawValueFetched = v
16041605
return v, true
16051606
}
16061607

1608+
func (p *pebbleMVCCScanner) decodeCurrentValue(requireHeader bool) (valid bool) {
1609+
v, callerOwned, err := p.curRawValue.Value(p.lazyValueBuf)
1610+
if err != nil {
1611+
p.err = err
1612+
return false
1613+
}
1614+
if callerOwned {
1615+
p.lazyValueBuf = v[:0]
1616+
}
1617+
p.curRawValueFetched = v
1618+
1619+
if !requireHeader {
1620+
return p.decodeCurrentValueIgnoringHeader(p.curRawValueFetched)
1621+
}
1622+
extended, valid := p.tryDecodeCurrentValueSimple(p.curRawValueFetched)
1623+
if !valid {
1624+
return false
1625+
} else if !extended {
1626+
return true
1627+
}
1628+
return p.decodeCurrentValueExtended(p.curRawValueFetched)
1629+
}
1630+
16071631
func (p *pebbleMVCCScanner) decodeCurrentMetadata() bool {
16081632
val, valid := p.getFromLazyValue()
16091633
if !valid {

0 commit comments

Comments
 (0)