diff --git a/compaction.go b/compaction.go index e44d3d6a30..f6409344af 100644 --- a/compaction.go +++ b/compaction.go @@ -3479,16 +3479,11 @@ func (d *DB) compactAndWrite( writerOpts.Compression = block.FastestCompression } vSep := valueSeparation - switch spanPolicy.ValueStoragePolicy.PolicyAdjustment { - case UseDefaultValueStorage: - // No change to value separation. - case NoValueSeparation: + if spanPolicy.ValueStoragePolicy.DisableBlobSeparation { vSep = valsep.NeverSeparateValues{} - case OverrideValueStorage: - // This span of keyspace is more tolerant of latency, so set a more - // aggressive value separation policy for this output. + } else if spanPolicy.ValueStoragePolicy.ContainsOverrides() { vSep.SetNextOutputConfig(valsep.ValueSeparationOutputConfig{ - MinimumSize: spanPolicy.ValueStoragePolicy.MinimumSize, + MinimumSize: spanPolicy.ValueStoragePolicy.OverrideBlobSeparationMinimumSize, DisableValueSeparationBySuffix: spanPolicy.ValueStoragePolicy.DisableSeparationBySuffix, }) } diff --git a/compaction_test.go b/compaction_test.go index 29f6aba21e..fef1ba1d89 100644 --- a/compaction_test.go +++ b/compaction_test.go @@ -1530,10 +1530,6 @@ func runCompactionTest( if len(parts) != 1 { td.Fatalf(t, "expected disable-separation-by-suffix with no value, got: %s", arg) } - if policy.ValueStoragePolicy.PolicyAdjustment == NoValueSeparation { - td.Fatalf(t, "conflicting value storage policies for span: %s", line) - } - policy.ValueStoragePolicy.PolicyAdjustment = OverrideValueStorage policy.ValueStoragePolicy.DisableSeparationBySuffix = true case "val-sep-minimum-size": if len(parts) != 2 { @@ -1541,13 +1537,11 @@ func runCompactionTest( } size, err := strconv.ParseUint(parts[1], 10, 64) if err != nil { - td.Fatalf(t, "parsing value-minimum-size: %s", err) + td.Fatalf(t, "parsing val-sep-minimum-size: %s", err) } - policy.ValueStoragePolicy.MinimumSize = int(size) + policy.ValueStoragePolicy.OverrideBlobSeparationMinimumSize = int(size) if size == 0 { - policy.ValueStoragePolicy.PolicyAdjustment = NoValueSeparation - } else if int(size) != d.opts.Experimental.ValueSeparationPolicy().MinimumSize { - policy.ValueStoragePolicy.PolicyAdjustment = OverrideValueStorage + policy.ValueStoragePolicy.DisableBlobSeparation = true } default: td.Fatalf(t, "unknown span policy arg: %s", arg) diff --git a/compaction_value_separation.go b/compaction_value_separation.go index 8d44c5c58a..a8d614e3ea 100644 --- a/compaction_value_separation.go +++ b/compaction_value_separation.go @@ -137,11 +137,10 @@ func shouldWriteBlobFiles( // blob files so values are stored according to the current policy. return true, 0 } - switch spanPolicy.ValueStoragePolicy.PolicyAdjustment { - case UseDefaultValueStorage: - // Use the global policy. - case OverrideValueStorage: - expectedMinSize = spanPolicy.ValueStoragePolicy.MinimumSize + if spanPolicy.ValueStoragePolicy.DisableBlobSeparation { + expectedMinSize = 0 + } else if spanPolicy.ValueStoragePolicy.ContainsOverrides() { + expectedMinSize = spanPolicy.ValueStoragePolicy.OverrideBlobSeparationMinimumSize if expectedMinSize == 0 { // A 0 minimum value size on the span policy indicates the field // was unset, but other parts of value separation are being @@ -149,8 +148,6 @@ func shouldWriteBlobFiles( expectedMinSize = policy.MinimumSize } expectedValSepBySuffixDisabled = spanPolicy.ValueStoragePolicy.DisableSeparationBySuffix - case NoValueSeparation: - expectedMinSize = 0 } } diff --git a/options.go b/options.go index fc7ff0c692..a24ffcea72 100644 --- a/options.go +++ b/options.go @@ -1297,24 +1297,19 @@ func (p SpanPolicy) String() string { if p.ValueStoragePolicy.DisableSeparationBySuffix { sb.WriteString("disable-value-separation-by-suffix,") } - switch p.ValueStoragePolicy.PolicyAdjustment { - case NoValueSeparation: - sb.WriteString("no-value-separation,") - case OverrideValueStorage: - sb.WriteString("override,") + if p.ValueStoragePolicy.DisableBlobSeparation { + sb.WriteString("no-blob-value-separation,") + } + if p.ValueStoragePolicy.OverrideBlobSeparationMinimumSize > 0 { + sb.WriteString("override-value-separation-min-size") } return strings.TrimSuffix(sb.String(), ",") } // ValueStoragePolicyAdjustment is used to determine where to store the values for -// KVs. If the PolicyAdjustment specified is OverrideValueStorage, the remaining fields -// are used to override the global configuration for value separation. +// KVs, overriding global policies. Values can be configured to be stored in-place, +// in value blocks, or in blob files. type ValueStoragePolicyAdjustment struct { - // PolicyAdjustment specifies the policy adjustment to apply. - PolicyAdjustment ValueStoragePolicyAdjustmentType - - // Remaining fields are ignored, unless the PolicyAdjustment is OverrideValueStorage. - // DisableSeparationBySuffix disables discriminating KVs depending on // suffix. // @@ -1322,45 +1317,43 @@ type ValueStoragePolicyAdjustment struct { // optimize access to the KV with the smallest suffix. This is useful for MVCC // keys (where the smallest suffix is the latest version), but should be // disabled for keys where the suffix does not correspond to a version. + // See sstable.IsLikelyMVCCGarbage for the exact criteria we use to + // determine whether a value is likely MVCC garbage. + // + // If separation by suffix is enabled, KVs with older suffix values will be + // written according to the following rules: + // - If the value is empty, no separation is performed. + // - If blob separation is enabled the value will be separated into a blob + // file even if its size is smaller than the minimum value size. + // - If blob separation is disabled, the value will be written to a value + // block within the sstable. DisableSeparationBySuffix bool - // MinimumSize is the minimum size of the value. - MinimumSize int -} -// ValueStoragePolicyAdjustmentType is a hint used to determine where to store the -// values for KVs. -type ValueStoragePolicyAdjustmentType uint8 + // DisableBlobSeparation disables separating values into blob files. + DisableBlobSeparation bool -const ( - // UseDefaultValueStorage is the default value; Pebble will respect global - // configuration for value separation. - UseDefaultValueStorage ValueStoragePolicyAdjustmentType = iota - - // NoValueSeparation indicates Pebble should prefer storing values - // in-place. - NoValueSeparation - - // OverrideValueStorage indicates that value separation thresholds (see - // valsep.ValueSeparationOutputConfig) for this key range are being - // overridden from a SpanPolicy. If the global Options enable value - // separation, Pebble will separate values under the OverrideValueStorage - // policy even if they do not meet the minimum size threshold of the - // global Options' ValueSeparationPolicy. - OverrideValueStorage -) + // OverrideBlobSeparationMinimumSize overrides the minimum size required + // for value separation into a blob file. Note that value separation must + // be enabled globally for this to take effect. + OverrideBlobSeparationMinimumSize int +} + +func (vsp *ValueStoragePolicyAdjustment) ContainsOverrides() bool { + return vsp.OverrideBlobSeparationMinimumSize > 0 || vsp.DisableSeparationBySuffix +} // ValueStorageLatencyTolerant is the suggested ValueStoragePolicyAdjustment // to use for key ranges that can tolerate higher value retrieval // latency. var ValueStorageLatencyTolerant = ValueStoragePolicyAdjustment{ - PolicyAdjustment: OverrideValueStorage, - MinimumSize: 10, + OverrideBlobSeparationMinimumSize: 10, } // ValueStorageLowReadLatency is the suggested ValueStoragePolicyAdjustment // to use for key ranges that require low value retrieval latency. var ValueStorageLowReadLatency = ValueStoragePolicyAdjustment{ - PolicyAdjustment: NoValueSeparation, + DisableBlobSeparation: true, + DisableSeparationBySuffix: true, } // SpanPolicyFunc is used to determine the SpanPolicy for a key region. diff --git a/sstable/properties.go b/sstable/properties.go index 30a86616c2..6b72729562 100644 --- a/sstable/properties.go +++ b/sstable/properties.go @@ -125,8 +125,9 @@ type Properties struct { // The minimum size a value must be to be separated into a blob file during writing. ValueSeparationMinSize uint64 `prop:"pebble.value-separation.min-size"` // ValueSeparationBySuffixDisabled indicates if special value separation rules were - // applied based on the KV suffix when writing. Pebble attempts to optimize writing of - // MVCC garbage values into blob files, which are recognized by the key suffix. + // applied based on the KV suffix when writing blob files. Pebble attempts to optimize + // separating MVCC garbage, which are recognized by the key suffix. Note that this + // value corresponds only for blob file writing and not value blocks. ValueSeparationBySuffixDisabled bool `prop:"pebble.value-separation.by-suffix.disabled"` // User collected properties. Currently, we only use them to store block // properties aggregated at the table level. diff --git a/testdata/compaction/mvcc_garbage_blob b/testdata/compaction/mvcc_garbage_blob index 4603da7969..6355c4b7e4 100644 --- a/testdata/compaction/mvcc_garbage_blob +++ b/testdata/compaction/mvcc_garbage_blob @@ -67,7 +67,7 @@ flush-log # Another test with the same value separation config, but this time we set a span # policy for the span y-zz that disables value separation by suffix. This should -# prevent MVCC garbage from being written to blob files. +# prevent MVCC garbage from being separeted (both into blob files and value blocks). define value-separation=(enabled, min-size=5, max-ref-depth=3, garbage-ratios=1.0:1.0) ---- @@ -127,3 +127,99 @@ pebble.compression_stats: None:234 pebble.value-separation.min-size: 5 pebble.value-separation.by-suffix.disabled: true obsolete-key: hex:00 + +define value-separation=(enabled, min-size=5, max-ref-depth=3, garbage-ratios=1.0:1.0) +---- + +set-span-policies +a,c val-sep-minimum-size=0 +---- + + +batch +set a@2 a2 +set b@5 b5 +del b@4 +set b@3 bat3 +set b@2 bat2 +del b@1 +set b@1 bat1 +set b@0 bat0 +---- + +# Although we've disabled value separation into blob files, writing to +# value blocks should still be enabled. +flush +---- +L0.0: + 000005:[a@2#10,SET-b@0#17,SET] seqnums:[10-17] points:[a@2#10,SET-b@0#17,SET] size:880 + +sstable-properties file=000005 +---- +rocksdb.num.entries: 7 +rocksdb.raw.key.size: 77 +rocksdb.raw.value.size: 20 +pebble.raw.point-tombstone.key.size: 3 +rocksdb.deleted.keys: 1 +rocksdb.num.range-deletions: 0 +pebble.value-blocks.size: 25 +rocksdb.num.data.blocks: 1 +rocksdb.comparator: pebble.internal.testkeys +rocksdb.data.size: 175 +rocksdb.filter.size: 0 +rocksdb.index.size: 36 +rocksdb.block.based.table.index.type: 0 +pebble.colblk.schema: DefaultKeySchema(pebble.internal.testkeys,16) +rocksdb.merge.operator: pebble.concatenate +rocksdb.merge.operands: 0 +pebble.num.value-blocks: 1 +pebble.num.values.in.value-blocks: 3 +rocksdb.property.collectors: [obsolete-key] +rocksdb.compression: Snappy +pebble.compression_stats: None:221 +obsolete-key: hex:00 + +batch +set a@2 a2 +set b@5 b5 +del b@4 +set b@3 bat3 +set b@2 bat2 +del b@1 +set b@1 bat1 +set b@0 bat0 +---- + +# Disabling value separation by suffix should also disable writing to value blocks. +set-span-policies +a,c val-sep-minimum-size=0 disable-separation-by-suffix +---- + +flush +---- +L0.1: + 000007:[a@2#18,SET-b@0#25,SET] seqnums:[18-25] points:[a@2#18,SET-b@0#25,SET] size:770 +L0.0: + 000005:[a@2#10,SET-b@0#17,SET] seqnums:[10-17] points:[a@2#10,SET-b@0#17,SET] size:880 + +sstable-properties file=000007 +---- +rocksdb.num.entries: 7 +rocksdb.raw.key.size: 77 +rocksdb.raw.value.size: 20 +pebble.raw.point-tombstone.key.size: 3 +rocksdb.deleted.keys: 1 +rocksdb.num.range-deletions: 0 +rocksdb.num.data.blocks: 1 +rocksdb.comparator: pebble.internal.testkeys +rocksdb.data.size: 157 +rocksdb.filter.size: 0 +rocksdb.index.size: 36 +rocksdb.block.based.table.index.type: 0 +pebble.colblk.schema: DefaultKeySchema(pebble.internal.testkeys,16) +rocksdb.merge.operator: pebble.concatenate +rocksdb.merge.operands: 0 +rocksdb.property.collectors: [obsolete-key] +rocksdb.compression: Snappy +pebble.compression_stats: None:188 +obsolete-key: hex:00 diff --git a/testdata/static_span_policy_func b/testdata/static_span_policy_func index aa46a12f04..a11939fc13 100644 --- a/testdata/static_span_policy_func +++ b/testdata/static_span_policy_func @@ -6,8 +6,8 @@ a b c d e f g a -> until d b -> until d c -> until d -d -> no-value-separation until f -e -> no-value-separation until f +d -> disable-value-separation-by-suffix,no-blob-value-separation until f +e -> disable-value-separation-by-suffix,no-blob-value-separation until f f -> none g -> none @@ -17,11 +17,11 @@ g -> none test a-z:lowlatency a b c d e z ---- -a -> no-value-separation until z -b -> no-value-separation until z -c -> no-value-separation until z -d -> no-value-separation until z -e -> no-value-separation until z +a -> disable-value-separation-by-suffix,no-blob-value-separation until z +b -> disable-value-separation-by-suffix,no-blob-value-separation until z +c -> disable-value-separation-by-suffix,no-blob-value-separation until z +d -> disable-value-separation-by-suffix,no-blob-value-separation until z +e -> disable-value-separation-by-suffix,no-blob-value-separation until z z -> none # Abutting policies. @@ -30,12 +30,12 @@ test b-d:lowlatency d-f:latencytolerant f-h:lowlatency a b c d e f g h i z ---- a -> until b -b -> no-value-separation until d -c -> no-value-separation until d -d -> override until f -e -> override until f -f -> no-value-separation until h -g -> no-value-separation until h +b -> disable-value-separation-by-suffix,no-blob-value-separation until d +c -> disable-value-separation-by-suffix,no-blob-value-separation until d +d -> override-value-separation-min-size until f +e -> override-value-separation-min-size until f +f -> disable-value-separation-by-suffix,no-blob-value-separation until h +g -> disable-value-separation-by-suffix,no-blob-value-separation until h h -> none i -> none z -> none @@ -46,14 +46,14 @@ test b-d:lowlatency h-j:latencytolerant a b c d e f g h i j k l ---- a -> until b -b -> no-value-separation until d -c -> no-value-separation until d +b -> disable-value-separation-by-suffix,no-blob-value-separation until d +c -> disable-value-separation-by-suffix,no-blob-value-separation until d d -> until h e -> until h f -> until h g -> until h -h -> override until j -i -> override until j +h -> override-value-separation-min-size until j +i -> override-value-separation-min-size until j j -> none k -> none l -> none diff --git a/valsep/value_separator.go b/valsep/value_separator.go index 2e0ed95744..b33242002c 100644 --- a/valsep/value_separator.go +++ b/valsep/value_separator.go @@ -163,8 +163,7 @@ func NewWriteNewBlobFiles( // SetNextOutputConfig implements the ValueSeparation interface. func (vs *ValueSeparator) SetNextOutputConfig(config ValueSeparationOutputConfig) { if config.MinimumSize == 0 { - // This indicates that MinimumSize was unset, so fall back - // to the global minimum size. + // No override, so fall back to the global minimum size. config.MinimumSize = vs.globalConfig.MinimumSize } vs.currentConfig = config