Skip to content

Commit c8c53ab

Browse files
committed
internal/base: introduce KVMeta, iterators
Informs: #5466
1 parent 97bcce6 commit c8c53ab

File tree

9 files changed

+219
-0
lines changed

9 files changed

+219
-0
lines changed

internal/base/internal.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -626,6 +626,29 @@ type InternalKV struct {
626626
V InternalValue
627627
}
628628

629+
type TieringAttribute uint64
630+
631+
// KVMeta describes optional metadata associated with an `InternalKV`.
632+
// It's currently produced only by sstable-backed iterators and is not embedded
633+
// within `InternalKV` to avoid overhead on the common iteration path.
634+
// Instead, select iterators expose methods that return the metadata alongside
635+
// the key/value:
636+
// - `levelIter.FirstWithMeta` / `levelIter.NextWithMeta`
637+
// - sstable iterators' `FirstWithMeta` / `NextWithMeta`
638+
//
639+
// These methods exist to support compaction-only logic (eg, `compaction.Iter`).
640+
// Regular iteration should use the standard methods that do not surface metadata.
641+
type KVMeta struct {
642+
TieringSpanID uint64
643+
// TieringAttribute is a user-specified attribute for the key-value pair.
644+
//
645+
// TODO(sumeer): For CockroachDB decide on units for this attribute, which
646+
// will be a timestamp, since unix nanos is unnecessarily large.
647+
// log2(24*365*100) = 19.74, i.e., number of hours in 100 years fits in 3
648+
// bytes.
649+
TieringAttribute TieringAttribute
650+
}
651+
629652
// Kind returns the KV's internal key kind.
630653
func (kv *InternalKV) Kind() InternalKeyKind {
631654
return kv.K.Kind()

internal/base/iterator.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -517,6 +517,13 @@ func (s *InternalIteratorStats) SafeFormat(p redact.SafePrinter, verb rune) {
517517
}
518518
}
519519

520+
// MetaDecoder is an optional interface that can be implemented by iterators
521+
// to provide metadata about the current key-value pair.
522+
type MetaDecoder interface {
523+
// DecodeMeta returns metadata for the current iterator position.
524+
DecodeMeta() KVMeta
525+
}
526+
520527
// IteratorDebug is an interface implemented by all internal iterators and
521528
// fragment iterators.
522529
type IteratorDebug interface {

internal/compact/iterator.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,13 @@ import (
1818
"github.com/cockroachdb/redact"
1919
)
2020

21+
// MetaIterator is an interface for iterators that support metadata extraction.
22+
type MetaIterator interface {
23+
base.InternalIterator
24+
FirstWithMeta() (*base.InternalKV, base.KVMeta)
25+
NextWithMeta() (*base.InternalKV, base.KVMeta)
26+
}
27+
2128
// Iter provides a forward-only iterator that encapsulates the logic for
2229
// collapsing entries during compaction. It wraps an internal iterator and
2330
// collapses entries that are no longer necessary because they are shadowed by

level_iter.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -729,6 +729,37 @@ func (l *levelIter) First() *base.InternalKV {
729729
return l.verify(l.skipEmptyFileForward())
730730
}
731731

732+
// FirstWithMeta moves the iterator to the first key/value pair and returns
733+
// both the key/value and the associated metadata. This method is used by
734+
// compaction iterators that need access to tiering metadata without adding
735+
// overhead to the common iteration path.
736+
func (l *levelIter) FirstWithMeta() (*base.InternalKV, base.KVMeta) {
737+
return l.First(), l.extractMetaFromCurrentPosition()
738+
}
739+
740+
// NextWithMeta moves the iterator to the next key/value pair and returns
741+
// both the key/value and the associated metadata. This method is used by
742+
// compaction iterators that need access to tiering metadata without adding
743+
// overhead to the common iteration path.
744+
func (l *levelIter) NextWithMeta() (*base.InternalKV, base.KVMeta) {
745+
return l.Next(), l.extractMetaFromCurrentPosition()
746+
}
747+
748+
// extractMetaFromCurrentPosition extracts KVMeta from the current iterator
749+
// position. This method delegates to the underlying iterator if it supports the
750+
// specialized methods.
751+
func (l *levelIter) extractMetaFromCurrentPosition() base.KVMeta {
752+
if l.iter == nil {
753+
return base.KVMeta{}
754+
}
755+
756+
if metaDecoder, ok := l.iter.(base.MetaDecoder); ok {
757+
return metaDecoder.DecodeMeta()
758+
}
759+
760+
return base.KVMeta{}
761+
}
762+
732763
func (l *levelIter) Last() *base.InternalKV {
733764
if invariants.Enabled && l.upper != nil {
734765
panic(errors.AssertionFailedf("levelIter Last called while upper bound %q is set", l.upper))

sstable/colblk/data_block.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -993,6 +993,7 @@ func (v *DataBlockValidator) Validate(
993993
}
994994

995995
var _ blockiter.Data = (*DataBlockIter)(nil)
996+
var _ base.MetaDecoder = (*DataBlockIter)(nil)
996997

997998
// DataBlockIter iterates over a columnar data block.
998999
type DataBlockIter struct {
@@ -1267,6 +1268,33 @@ func (i *DataBlockIter) First() *base.InternalKV {
12671268
return i.decodeRow()
12681269
}
12691270

1271+
// FirstWithMeta moves the iterator to the first key/value pair and returns
1272+
// both the key/value and the associated metadata. This method is used by
1273+
// compaction iterators that need access to tiering metadata without adding
1274+
// overhead to the common iteration path.
1275+
func (i *DataBlockIter) FirstWithMeta() (*base.InternalKV, base.KVMeta) {
1276+
return i.First(), i.decodeMeta()
1277+
}
1278+
1279+
// NextWithMeta moves the iterator to the next key/value pair and returns
1280+
// both the key/value and the associated metadata. This method is used by
1281+
// compaction iterators that need access to tiering metadata without adding
1282+
// overhead to the common iteration path.
1283+
func (i *DataBlockIter) NextWithMeta() (*base.InternalKV, base.KVMeta) {
1284+
return i.Next(), i.decodeMeta()
1285+
}
1286+
1287+
// decodeMeta extracts the KVMeta for the current row.
1288+
func (i *DataBlockIter) decodeMeta() base.KVMeta {
1289+
// TODO: Implement tiering metadata extraction when the fields are available
1290+
return base.KVMeta{}
1291+
}
1292+
1293+
// DecodeMeta implements the base.MetaDecoder interface.
1294+
func (i *DataBlockIter) DecodeMeta() base.KVMeta {
1295+
return i.decodeMeta()
1296+
}
1297+
12701298
// Last implements the base.InternalIterator interface.
12711299
func (i *DataBlockIter) Last() *base.InternalKV {
12721300
if i.d == nil {

sstable/reader_iter.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
// https://go.googlesource.com/proposal/+/refs/heads/master/design/43651-type-parameters.md#pointer-method-example
2626
type dataBlockIterator[D any] interface {
2727
blockiter.Data
28+
base.MetaDecoder
2829

2930
*D // non-interface type constraint element
3031
}

sstable/reader_iter_single_lvl.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1226,6 +1226,33 @@ func (i *singleLevelIterator[I, PI, D, PD]) First() *base.InternalKV {
12261226
return i.firstInternal()
12271227
}
12281228

1229+
// FirstWithMeta moves the iterator to the first key/value pair and returns
1230+
// both the key/value and the associated metadata. This method is used by
1231+
// compaction iterators that need access to tiering metadata without adding
1232+
// overhead to the common iteration path.
1233+
func (i *singleLevelIterator[I, PI, D, PD]) FirstWithMeta() (*base.InternalKV, base.KVMeta) {
1234+
return i.First(), i.extractMetaFromCurrentPosition()
1235+
}
1236+
1237+
// NextWithMeta moves the iterator to the next key/value pair and returns
1238+
// both the key/value and the associated metadata. This method is used by
1239+
// compaction iterators that need access to tiering metadata without adding
1240+
// overhead to the common iteration path.
1241+
func (i *singleLevelIterator[I, PI, D, PD]) NextWithMeta() (*base.InternalKV, base.KVMeta) {
1242+
return i.Next(), i.extractMetaFromCurrentPosition()
1243+
}
1244+
1245+
// extractMetaFromCurrentPosition extracts KVMeta from the current iterator position.
1246+
// This method delegates to the underlying data block iterator if it supports
1247+
// the specialized methods.
1248+
func (i *singleLevelIterator[I, PI, D, PD]) extractMetaFromCurrentPosition() base.KVMeta {
1249+
if PD(&i.data).IsDataInvalidated() {
1250+
return base.KVMeta{}
1251+
}
1252+
1253+
return PD(&i.data).DecodeMeta()
1254+
}
1255+
12291256
// firstInternal is a helper used for absolute positioning in a single-level
12301257
// index file, or for positioning in the second-level index in a two-level
12311258
// index file. For the latter, one cannot make any claims about absolute

sstable/reader_iter_two_lvl.go

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -856,6 +856,95 @@ func (i *twoLevelIterator[I, PI, D, PD]) First() *base.InternalKV {
856856
return i.skipForward()
857857
}
858858

859+
// FirstWithMeta moves the iterator to the first key/value pair and returns
860+
// both the key/value and the associated metadata. This method is used by
861+
// compaction iterators that need access to tiering metadata without adding
862+
// overhead to the common iteration path.
863+
func (i *twoLevelIterator[I, PI, D, PD]) FirstWithMeta() (*base.InternalKV, base.KVMeta) {
864+
i.lastOpWasSeekPrefixGE.Set(false)
865+
// The synthetic key is no longer relevant and must be cleared.
866+
i.secondLevel.synthetic.atSyntheticKey = false
867+
868+
// If we have a lower bound, use SeekGE. Note that in general this is not
869+
// supported usage, except when the lower bound is there because the table is
870+
// virtual.
871+
if i.secondLevel.lower != nil {
872+
kv := i.SeekGE(i.secondLevel.lower, base.SeekGEFlagsNone)
873+
if kv == nil {
874+
return nil, base.KVMeta{}
875+
}
876+
meta := i.extractMetaFromCurrentPosition()
877+
return kv, meta
878+
}
879+
i.secondLevel.exhaustedBounds = 0
880+
i.secondLevel.err = nil // clear cached iteration error
881+
// Seek optimization only applies until iterator is first positioned after SetBounds.
882+
i.secondLevel.boundsCmp = 0
883+
884+
if !i.ensureTopLevelIndexLoaded() {
885+
return nil, base.KVMeta{}
886+
}
887+
888+
if !PI(&i.topLevelIndex).First() {
889+
return nil, base.KVMeta{}
890+
}
891+
result := i.loadSecondLevelIndexBlock(+1)
892+
if result == loadBlockFailed {
893+
return nil, base.KVMeta{}
894+
}
895+
if result == loadBlockOK {
896+
if ikv := i.secondLevel.First(); ikv != nil {
897+
meta := i.extractMetaFromCurrentPosition()
898+
return ikv, meta
899+
}
900+
// Else fall through to skipForward.
901+
} else {
902+
// result == loadBlockIrrelevant. Enforce the upper bound here since
903+
// don't want to bother moving to the next entry in the top level index
904+
// if upper bound is already exceeded. Note that the next entry starts
905+
// with keys >= topLevelIndex.Separator() since even though this is the
906+
// block separator, the same user key can span multiple index blocks.
907+
// If upper is exclusive we pass orEqual=true below, else we require the
908+
// separator to be strictly greater than upper.
909+
if i.secondLevel.upper != nil && PI(&i.topLevelIndex).SeparatorGT(
910+
i.secondLevel.upper, !i.secondLevel.endKeyInclusive) {
911+
i.secondLevel.exhaustedBounds = +1
912+
}
913+
}
914+
// NB: skipForward checks whether exhaustedBounds is already +1.
915+
kv := i.skipForward()
916+
if kv == nil {
917+
return nil, base.KVMeta{}
918+
}
919+
meta := i.extractMetaFromCurrentPosition()
920+
return kv, meta
921+
}
922+
923+
// NextWithMeta moves the iterator to the next key/value pair and returns
924+
// both the key/value and the associated metadata. This method is used by
925+
// compaction iterators that need access to tiering metadata without adding
926+
// overhead to the common iteration path.
927+
func (i *twoLevelIterator[I, PI, D, PD]) NextWithMeta() (*base.InternalKV, base.KVMeta) {
928+
kv := i.Next()
929+
if kv == nil {
930+
return nil, base.KVMeta{}
931+
}
932+
meta := i.extractMetaFromCurrentPosition()
933+
return kv, meta
934+
}
935+
936+
// extractMetaFromCurrentPosition extracts KVMeta from the current iterator position.
937+
// This method delegates to the underlying second level iterator if it supports
938+
// the specialized methods.
939+
func (i *twoLevelIterator[I, PI, D, PD]) extractMetaFromCurrentPosition() base.KVMeta {
940+
if PD(&i.secondLevel.data).IsDataInvalidated() {
941+
return base.KVMeta{}
942+
}
943+
944+
// The dataBlockIterator constraint guarantees that PD(&i.secondLevel.data) implements MetaDecoder
945+
return PD(&i.secondLevel.data).DecodeMeta()
946+
}
947+
859948
// Last implements internalIterator.Last, as documented in the pebble
860949
// package. Note that Last only checks the lower bound. It is up to the caller
861950
// to ensure that key is less than the upper bound (e.g. via a call to

sstable/rowblk/rowblk_iter.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1573,6 +1573,12 @@ func (i *Iter) Error() error {
15731573
return nil // infallible
15741574
}
15751575

1576+
// DecodeMeta implements the base.MetaDecoder interface.
1577+
// Row-oriented blocks don't have tiering metadata, so this always returns empty metadata.
1578+
func (i *Iter) DecodeMeta() base.KVMeta {
1579+
return base.KVMeta{}
1580+
}
1581+
15761582
// Close implements internalIterator.Close, as documented in the pebble
15771583
// package.
15781584
func (i *Iter) Close() error {

0 commit comments

Comments
 (0)