@@ -11,23 +11,26 @@ import (
1111 "sync"
1212 "unsafe"
1313
14+ "github.com/cockroachdb/errors"
1415 "github.com/cockroachdb/pebble/internal/base"
1516 "github.com/cockroachdb/pebble/internal/invariants"
1617 "github.com/cockroachdb/pebble/internal/keyspan"
1718)
1819
1920// Block properties are an optional user-facing feature that can be used to
20- // filter data blocks (and whole sstables) from an Iterator before they are
21- // loaded. They do not apply to range delete blocks. These are expected to
22- // very concisely represent a set of some attribute value contained within the
23- // key or value, such that the set includes all the attribute values in the
24- // block. This has some similarities with OLAP pruning approaches that
25- // maintain min-max attribute values for some column (which concisely
26- // represent a set), that is then used to prune at query time. In Pebble's
27- // case, data blocks are small, typically 25-50KB, so these properties should
28- // reduce their precision in order to be concise -- a good rule of thumb is to
29- // not consume more than 50-100 bytes across all properties maintained for a
30- // block, i.e., a 500x reduction compared to loading the data block.
21+ // filter data blocks or index blocks or whole sstables from an Iterator before
22+ // they are loaded.
23+ //
24+ // Block properties are expected to very concisely represent a set of some
25+ // attribute value contained within the key or value, such that the set includes
26+ // all the attribute values in the block. This has some similarities with OLAP
27+ // pruning approaches that maintain min-max attribute values for some column
28+ // (which concisely represent a set), that is then used to prune at query time.
29+ // In Pebble's case, data blocks are small, typically 25-50KB, so these
30+ // properties should reduce their precision in order to be concise -- a good
31+ // rule of thumb is to not consume more than 50-100 bytes across all properties
32+ // maintained for a block, i.e., a 500x reduction compared to loading the data
33+ // block.
3134//
3235// A block property must be assigned a unique name, which is encoded and
3336// stored in the sstable. This name must be unique among all user-properties
@@ -37,17 +40,12 @@ import (
3740// considered semantically identical. The caller is free to choose the
3841// semantics of an empty byte slice e.g. they could use it to represent the
3942// empty set or the universal set, whichever they think is more common and
40- // therefore better to encode more concisely. The serialization of the
41- // property for the various Finish*() calls in a BlockPropertyCollector
42- // implementation should be identical, since the corresponding
43- // BlockPropertyFilter implementation is not told the context in which it is
44- // deserializing the property.
43+ // therefore better to encode more concisely.
4544//
46- // Block properties are more general than table properties and should be
47- // preferred over using table properties. A BlockPropertyCollector can achieve
48- // identical behavior to table properties by returning the nil slice from
49- // FinishDataBlock and FinishIndexBlock, and interpret them as the universal
50- // set in BlockPropertyFilter, and return a non-universal set in FinishTable.
45+ // Block properties are hierarchical: the properties for an index block must be
46+ // derivable just from the properties of the data blocks it contains. Similarly,
47+ // the table properties must be derivable just from the properties of the index
48+ // blocks and range block.
5149//
5250// Block property filtering is nondeterministic because the separation of keys
5351// into blocks is nondeterministic. Clients use block-property filters to
@@ -83,19 +81,10 @@ import (
8381// compactions. If Pebble is configured with such value separation, block
8482// properties must only apply to the key, and will be provided a nil value.
8583
86- // BlockPropertyCollector is used when writing a sstable.
87- //
88- // - All calls to Add are included in the next FinishDataBlock, after which
89- // the next data block is expected to start.
90- //
91- // - The index entry generated for the data block, which contains the return
92- // value from FinishDataBlock, is not immediately included in the current
93- // index block. It is included when AddPrevDataBlockToIndexBlock is called.
94- // An alternative would be to return an opaque handle from FinishDataBlock
95- // and pass it to a new AddToIndexBlock method, which requires more
96- // plumbing, and passing of an interface{} results in a undesirable heap
97- // allocation. AddPrevDataBlockToIndexBlock must be called before keys are
98- // added to the new data block.
84+ // BlockPropertyCollector is used when writing a sstable. Multiple
85+ // BlockPropertyCollector instances are used for each property, according to the
86+ // various levels (data/range blocks, index blocks, table). The lowest levels
87+ // use AddPointKey()/AddRangeKey() while the other levels use AddCollected().`
9988type BlockPropertyCollector interface {
10089 // Name returns the name of the block property collector.
10190 Name () string
@@ -104,16 +93,20 @@ type BlockPropertyCollector interface {
10493 // sstable. The callee can assume that these are in sorted order.
10594 AddPointKey (key InternalKey , value []byte ) error
10695
107- // AddRangeKeys is called for each range span added to the sstable. The range
108- // key properties are stored separately and don't contribute to data block
109- // properties. They are only used when FinishTable is called.
110- // TODO(radu): clean up this subtle semantic.
96+ // AddRangeKeys is called for each range span added to a range key block in
97+ // the sstable. The callee can assume these are fragmented and in sorted
98+ // order.
11199 AddRangeKeys (span keyspan.Span ) error
112100
101+ // AddCollected adds previously collected property data. For example, when
102+ // calculating properties for index blocks, AddCollected is called with the
103+ // results of Finish for each data block.
104+ AddCollected (prop []byte ) error
105+
113106 // AddCollectedWithSuffixReplacement adds previously collected property data
114- // and updates it to reflect a change of suffix on all keys: the old property
115- // data is assumed to be constructed from keys that all have the same
116- // oldSuffix and is recalculated to reflect the same keys but with newSuffix.
107+ // after updating to reflect a change of suffix on all keys: the property data
108+ // is recalculated to reflect the same keys it was computed from but with
109+ // newSuffix.
117110 //
118111 // A collector which supports this method must be able to derive its updated
119112 // value from its old value and the change being made to the suffix, without
@@ -129,29 +122,15 @@ type BlockPropertyCollector interface {
129122 // This method is optional (if it is not implemented, it always returns an
130123 // error). SupportsSuffixReplacement() can be used to check if this method is
131124 // implemented.
132- AddCollectedWithSuffixReplacement (oldProp []byte , oldSuffix , newSuffix []byte ) error
125+ AddCollectedWithSuffixReplacement (oldProp []byte , newSuffix []byte ) error
133126
134127 // SupportsSuffixReplacement returns whether the collector supports the
135128 // AddCollectedWithSuffixReplacement method.
136129 SupportsSuffixReplacement () bool
137130
138- // FinishDataBlock is called when all the entries have been added to a
139- // data block. Subsequent Add calls will be for the next data block. It
140- // returns the property value for the finished block.
141- FinishDataBlock (buf []byte ) ([]byte , error )
142-
143- // AddPrevDataBlockToIndexBlock adds the entry corresponding to the
144- // previous FinishDataBlock to the current index block.
145- AddPrevDataBlockToIndexBlock ()
146-
147- // FinishIndexBlock is called when an index block, containing all the
148- // key-value pairs since the last FinishIndexBlock, will no longer see new
149- // entries. It returns the property value for the index block.
150- FinishIndexBlock (buf []byte ) ([]byte , error )
151-
152- // FinishTable is called when the sstable is finished, and returns the
153- // property value for the sstable.
154- FinishTable (buf []byte ) ([]byte , error )
131+ // Finish appends the property value to buf and resets the collector to an
132+ // empty state.
133+ Finish (buf []byte ) []byte
155134}
156135
157136// BlockPropertyFilter is used in an Iterator to filter sstables and blocks
@@ -233,9 +212,7 @@ type BlockIntervalCollector struct {
233212 mapper IntervalMapper
234213 suffixReplacer BlockIntervalSuffixReplacer
235214
236- blockInterval BlockInterval
237- indexInterval BlockInterval
238- tableInterval BlockInterval
215+ interval BlockInterval
239216}
240217
241218var _ BlockPropertyCollector = & BlockIntervalCollector {}
@@ -291,7 +268,7 @@ func (b *BlockIntervalCollector) AddPointKey(key InternalKey, value []byte) erro
291268 if err != nil {
292269 return err
293270 }
294- b .blockInterval .UnionWith (interval )
271+ b .interval .UnionWith (interval )
295272 return nil
296273}
297274
@@ -306,14 +283,27 @@ func (b *BlockIntervalCollector) AddRangeKeys(span Span) error {
306283 }
307284 // Range keys are not included in block or index intervals; they just apply
308285 // directly to the table interval.
309- b .tableInterval .UnionWith (interval )
286+ b .interval .UnionWith (interval )
287+ return nil
288+ }
289+
290+ // AddCollected is part of the BlockPropertyCollector interface.
291+ func (b * BlockIntervalCollector ) AddCollected (prop []byte ) error {
292+ i , err := decodeBlockInterval (prop )
293+ if err != nil {
294+ return err
295+ }
296+ b .interval .UnionWith (i )
310297 return nil
311298}
312299
313300// AddCollectedWithSuffixReplacement is part of the BlockPropertyCollector interface.
314301func (b * BlockIntervalCollector ) AddCollectedWithSuffixReplacement (
315- oldProp []byte , oldSuffix , newSuffix []byte ,
302+ oldProp []byte , newSuffix []byte ,
316303) error {
304+ if b .suffixReplacer == nil {
305+ return errors .Errorf ("%s does not support suffix replacement" , b .name )
306+ }
317307 i , err := decodeBlockInterval (oldProp )
318308 if err != nil {
319309 return err
@@ -322,7 +312,7 @@ func (b *BlockIntervalCollector) AddCollectedWithSuffixReplacement(
322312 if err != nil {
323313 return err
324314 }
325- b .blockInterval .UnionWith (i )
315+ b .interval .UnionWith (i )
326316 return nil
327317}
328318
@@ -331,30 +321,11 @@ func (b *BlockIntervalCollector) SupportsSuffixReplacement() bool {
331321 return b .suffixReplacer != nil
332322}
333323
334- // FinishDataBlock is part of the BlockPropertyCollector interface.
335- func (b * BlockIntervalCollector ) FinishDataBlock (buf []byte ) ([]byte , error ) {
336- buf = encodeBlockInterval (b .blockInterval , buf )
337- b .tableInterval .UnionWith (b .blockInterval )
338- return buf , nil
339- }
340-
341- // AddPrevDataBlockToIndexBlock implements the BlockPropertyCollector
342- // interface.
343- func (b * BlockIntervalCollector ) AddPrevDataBlockToIndexBlock () {
344- b .indexInterval .UnionWith (b .blockInterval )
345- b .blockInterval = BlockInterval {}
346- }
347-
348- // FinishIndexBlock implements the BlockPropertyCollector interface.
349- func (b * BlockIntervalCollector ) FinishIndexBlock (buf []byte ) ([]byte , error ) {
350- buf = encodeBlockInterval (b .indexInterval , buf )
351- b .indexInterval = BlockInterval {}
352- return buf , nil
353- }
354-
355- // FinishTable implements the BlockPropertyCollector interface.
356- func (b * BlockIntervalCollector ) FinishTable (buf []byte ) ([]byte , error ) {
357- return encodeBlockInterval (b .tableInterval , buf ), nil
324+ // Finish is part of the BlockPropertyCollector interface.
325+ func (b * BlockIntervalCollector ) Finish (buf []byte ) []byte {
326+ result := encodeBlockInterval (b .interval , buf )
327+ b .interval = BlockInterval {}
328+ return result
358329}
359330
360331// BlockInterval represents the [Lower, Upper) interval of 64-bit values
0 commit comments