Skip to content

Commit e401883

Browse files
committed
storage: add configurable latency tolerant min, mvcc garbage min
This patch adds 2 new, undocumented cluster settings used for configuring the minimum bound for both latency tolerant keys and likely mvcc garbage to be separated into a blob file. Epic: none Release note: None
1 parent 3a39245 commit e401883

File tree

3 files changed

+80
-41
lines changed

3 files changed

+80
-41
lines changed

pkg/storage/open.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -560,7 +560,7 @@ func Open(
560560
var cfg engineConfig
561561
cfg.env = env
562562
cfg.settings = settings
563-
cfg.opts = DefaultPebbleOptions()
563+
cfg.opts = DefaultPebbleOptionsForOpen(&cfg.settings.SV)
564564
cfg.opts.FS = env
565565
cfg.opts.ReadOnly = env.IsReadOnly()
566566
for _, opt := range opts {

pkg/storage/pebble.go

Lines changed: 78 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -538,6 +538,24 @@ var (
538538
1 /* min */, 80 /* max */)),
539539
settings.IntInRange(1, 100),
540540
)
541+
valueSeparationLatencyTolerantMinimumSize = settings.RegisterIntSetting(
542+
settings.SystemVisible,
543+
"storage.value_separation.latency_tolerant_minimum_size",
544+
"the minimum size of a value that will be separated into a blob file given the value is "+
545+
"latency tolerant (in the range local keyspace)",
546+
int64(metamorphic.ConstantWithTestRange("storage.value_separation.latency_tolerant_minimum_size",
547+
32 /* 32 bytes (default) */, 25 /* 25 bytes (minimum) */, 512 /* 512 bytes (maximum) */)),
548+
settings.IntWithMinimum(1),
549+
)
550+
valueSeparationMVCCGarbageMinimumSize = settings.RegisterIntSetting(
551+
settings.SystemVisible,
552+
"storage.value_separation.mvcc_history_minimum_size",
553+
"the minimum size of a value that will be separated into a blob file given the value is "+
554+
"likely not the latest version of a key",
555+
int64(metamorphic.ConstantWithTestRange("storage.value_separation.mvcc_history_minimum_size",
556+
32 /* 32 bytes (default) */, 25 /* 25 bytes (minimum) */, 512 /* 512 bytes (maximum) */)),
557+
settings.IntWithMinimum(1),
558+
)
541559
)
542560

543561
// This setting controls deletion pacing. This helps prevent disk slowness
@@ -614,8 +632,7 @@ const DefaultMemtableSize = 64 << 20 // 64 MB
614632

615633
const mvccWallTimeIntervalCollector = "MVCCTimeInterval"
616634

617-
// DefaultPebbleOptions returns the default pebble options.
618-
func DefaultPebbleOptions() *pebble.Options {
635+
func defaultPebbleOptions(sv *settings.Values) *pebble.Options {
619636
opts := &pebble.Options{
620637
Comparer: &EngineComparer,
621638
FS: vfs.Default,
@@ -642,7 +659,7 @@ func DefaultPebbleOptions() *pebble.Options {
642659
opts.FlushDelayRangeKey = 10 * time.Second
643660
opts.Experimental.ShortAttributeExtractor = shortAttributeExtractorForValues
644661

645-
opts.Experimental.SpanPolicyFunc = spanPolicyFunc
662+
opts.Experimental.SpanPolicyFunc = spanPolicyFuncFactory(sv)
646663
opts.Experimental.UserKeyCategories = userKeyCategories
647664

648665
// Every 5 minutes, log iterators that have been open for more than 1 minute.
@@ -683,52 +700,74 @@ func DefaultPebbleOptions() *pebble.Options {
683700
return opts
684701
}
685702

703+
// DefaultPebbleOptions returns the default pebble options for general use
704+
// (e.g., SST writers, external iterators, tests). This does not use cluster
705+
// settings and should not be used when opening a production Pebble engine.
706+
func DefaultPebbleOptions() *pebble.Options {
707+
return defaultPebbleOptions(nil /* sv */)
708+
}
709+
710+
// DefaultPebbleOptionsForOpen returns the default pebble options for opening
711+
// a production Pebble engine. It uses cluster settings to configure value
712+
// storage policies.
713+
func DefaultPebbleOptionsForOpen(sv *settings.Values) *pebble.Options {
714+
return defaultPebbleOptions(sv)
715+
}
716+
686717
var (
687718
spanPolicyLocalRangeIDEndKey = EncodeMVCCKey(MVCCKey{Key: keys.LocalRangeIDPrefix.AsRawKey().PrefixEnd()})
688719
spanPolicyLockTableStartKey = EncodeMVCCKey(MVCCKey{Key: keys.LocalRangeLockTablePrefix})
689720
spanPolicyLockTableEndKey = EncodeMVCCKey(MVCCKey{Key: keys.LocalRangeLockTablePrefix.PrefixEnd()})
690721
spanPolicyLocalEndKey = EncodeMVCCKey(MVCCKey{Key: keys.LocalPrefix.PrefixEnd()})
691722
)
692723

693-
// spanPolicyFunc is a pebble.SpanPolicyFunc that applies special policies for
724+
// spanPolicyFuncFactory returns a pebble.SpanPolicyFunc that applies special policies for
694725
// the CockroachDB keyspace.
695-
func spanPolicyFunc(startKey []byte) (policy pebble.SpanPolicy, endKey []byte, _ error) {
696-
// There's no special policy for non-local keys.
697-
if !bytes.HasPrefix(startKey, keys.LocalPrefix) {
698-
return pebble.SpanPolicy{}, nil, nil
699-
}
700-
// Prefer fast compression for all local keys, since they shouldn't take up
701-
// a significant part of the space.
702-
policy.PreferFastCompression = true
703-
704-
// The first section of the local keyspace is the Range-ID keyspace. It
705-
// extends from the beginning of the keyspace to the Range Local keys. The
706-
// Range-ID keyspace includes the raft log, which is rarely read and
707-
// receives ~half the writes.
708-
if cockroachkvs.Compare(startKey, spanPolicyLocalRangeIDEndKey) < 0 {
709-
if !bytes.HasPrefix(startKey, keys.LocalRangeIDPrefix) {
710-
return pebble.SpanPolicy{}, nil, errors.AssertionFailedf("startKey %s is not a Range-ID key", startKey)
726+
func spanPolicyFuncFactory(sv *settings.Values) func([]byte) (pebble.SpanPolicy, []byte, error) {
727+
return func(startKey []byte) (policy pebble.SpanPolicy, endKey []byte, _ error) {
728+
// There's no special policy for non-local keys.
729+
if !bytes.HasPrefix(startKey, keys.LocalPrefix) {
730+
return pebble.SpanPolicy{}, nil, nil
731+
}
732+
// Prefer fast compression for all local keys, since they shouldn't take up
733+
// a significant part of the space.
734+
policy.PreferFastCompression = true
735+
736+
// The first section of the local keyspace is the Range-ID keyspace. It
737+
// extends from the beginning of the keyspace to the Range Local keys. The
738+
// Range-ID keyspace includes the raft log, which is rarely read and
739+
// receives ~half the writes.
740+
if cockroachkvs.Compare(startKey, spanPolicyLocalRangeIDEndKey) < 0 {
741+
if !bytes.HasPrefix(startKey, keys.LocalRangeIDPrefix) {
742+
return pebble.SpanPolicy{}, nil, errors.AssertionFailedf("startKey %s is not a Range-ID key", startKey)
743+
}
744+
if sv != nil {
745+
policy.ValueStoragePolicy = pebble.ValueStoragePolicyAdjustment{
746+
OverrideBlobSeparationMinimumSize: int(valueSeparationLatencyTolerantMinimumSize.Get(sv)),
747+
}
748+
} else {
749+
policy.ValueStoragePolicy = pebble.ValueStorageLatencyTolerant
750+
}
751+
return policy, spanPolicyLocalRangeIDEndKey, nil
711752
}
712-
policy.ValueStoragePolicy = pebble.ValueStorageLatencyTolerant
713-
return policy, spanPolicyLocalRangeIDEndKey, nil
714-
}
715753

716-
// We also disable value separation for lock keys.
717-
if cockroachkvs.Compare(startKey, spanPolicyLockTableEndKey) >= 0 {
718-
// Not a lock key, so use default value separation within sstable (by
719-
// suffix) and into blob files.
720-
// NB: there won't actually be a suffix in these local keys.
721-
return policy, spanPolicyLocalEndKey, nil
722-
}
723-
if cockroachkvs.Compare(startKey, spanPolicyLockTableStartKey) < 0 {
724-
// Not a lock key, so use default value separation within sstable (by
725-
// suffix) and into blob files.
726-
// NB: there won't actually be a suffix in these local keys.
727-
return policy, spanPolicyLockTableStartKey, nil
754+
// We also disable value separation for lock keys.
755+
if cockroachkvs.Compare(startKey, spanPolicyLockTableEndKey) >= 0 {
756+
// Not a lock key, so use default value separation within sstable (by
757+
// suffix) and into blob files.
758+
// NB: there won't actually be a suffix in these local keys.
759+
return policy, spanPolicyLocalEndKey, nil
760+
}
761+
if cockroachkvs.Compare(startKey, spanPolicyLockTableStartKey) < 0 {
762+
// Not a lock key, so use default value separation within sstable (by
763+
// suffix) and into blob files.
764+
// NB: there won't actually be a suffix in these local keys.
765+
return policy, spanPolicyLockTableStartKey, nil
766+
}
767+
// Lock key. Disable value separation.
768+
policy.ValueStoragePolicy = pebble.ValueStorageLowReadLatency
769+
return policy, spanPolicyLockTableEndKey, nil
728770
}
729-
// Lock key. Disable value separation.
730-
policy.ValueStoragePolicy = pebble.ValueStorageLowReadLatency
731-
return policy, spanPolicyLockTableEndKey, nil
732771
}
733772

734773
func shortAttributeExtractorForValues(
@@ -1060,7 +1099,7 @@ func newPebble(ctx context.Context, cfg engineConfig) (p *Pebble, err error) {
10601099
return pebble.ValueSeparationPolicy{
10611100
Enabled: true,
10621101
MinimumSize: int(valueSeparationMinimumSize.Get(&cfg.settings.SV)),
1063-
MinimumMVCCGarbageSize: 32,
1102+
MinimumMVCCGarbageSize: int(valueSeparationMVCCGarbageMinimumSize.Get(&cfg.settings.SV)),
10641103
MaxBlobReferenceDepth: int(valueSeparationMaxReferenceDepth.Get(&cfg.settings.SV)),
10651104
RewriteMinimumAge: valueSeparationRewriteMinimumAge.Get(&cfg.settings.SV),
10661105
GarbageRatioLowPriority: lowPri,

pkg/storage/pebble_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1903,7 +1903,7 @@ func TestPebbleSpanPolicyFunc(t *testing.T) {
19031903
for _, tc := range cases {
19041904
t.Run(fmt.Sprintf("%x", tc.startKey), func(t *testing.T) {
19051905
ek := EngineKey{Key: tc.startKey}.Encode()
1906-
policy, endKey, err := spanPolicyFunc(ek)
1906+
policy, endKey, err := spanPolicyFuncFactory(nil /* sv */)(ek)
19071907
require.NoError(t, err)
19081908
require.Equal(t, tc.wantPolicy, policy)
19091909
require.Equal(t, tc.wantEndKey, endKey)

0 commit comments

Comments
 (0)