Skip to content

Commit 8d88aaf

Browse files
committed
storage: aggressively separate values in range ID keyspace
Adapt Cockroach's implementation of pebble.SpanPolicyFunc to mark the range ID keyspace with ValueStorageLatencyTolerant. This setting will cause Pebble to separate values into blob files as long as they're large enough that a pointer to an external value is likely to be smaller than the value itself. Separating values improves write amplification by avoiding rewriting values during some compactions. We target the range ID keyspace because it contains the raft log, which is written during all user writes but rarely read. Tangentially related to #16624. Epic: none Release note (performance improvement): Reduces write amplification by storing raft log values in separate blob files. This reduces write bandwidth, especially on stores with many replicas. This in turn can increase throughput and reduce latency. This behavior is active as long as the storage.value_separation.enabled cluster setting is enabled.
1 parent c994ab7 commit 8d88aaf

File tree

2 files changed

+113
-22
lines changed

2 files changed

+113
-22
lines changed

pkg/storage/pebble.go

Lines changed: 50 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -549,28 +549,7 @@ func DefaultPebbleOptions() *pebble.Options {
549549
opts.TargetByteDeletionRate = 128 << 20 // 128 MB
550550
opts.Experimental.ShortAttributeExtractor = shortAttributeExtractorForValues
551551

552-
lockTableStartKey := EncodeMVCCKey(MVCCKey{Key: keys.LocalRangeLockTablePrefix})
553-
lockTableEndKey := EncodeMVCCKey(MVCCKey{Key: keys.LocalRangeLockTablePrefix.PrefixEnd()})
554-
localEndKey := EncodeMVCCKey(MVCCKey{Key: keys.LocalPrefix.PrefixEnd()})
555-
opts.Experimental.SpanPolicyFunc = func(startKey []byte) (policy pebble.SpanPolicy, endKey []byte, _ error) {
556-
if !bytes.HasPrefix(startKey, keys.LocalPrefix) {
557-
return pebble.SpanPolicy{}, nil, nil
558-
}
559-
// Prefer fast compression for all local keys, since they shouldn't take up
560-
// a significant part of the space.
561-
policy.PreferFastCompression = true
562-
563-
// We also disable value separation for lock keys.
564-
if cockroachkvs.Compare(startKey, lockTableEndKey) >= 0 {
565-
return policy, localEndKey, nil
566-
}
567-
if cockroachkvs.Compare(startKey, lockTableStartKey) < 0 {
568-
return policy, lockTableStartKey, nil
569-
}
570-
policy.DisableValueSeparationBySuffix = true
571-
policy.ValueStoragePolicy = pebble.ValueStorageLowReadLatency
572-
return policy, lockTableEndKey, nil
573-
}
552+
opts.Experimental.SpanPolicyFunc = spanPolicyFunc
574553
opts.Experimental.UserKeyCategories = userKeyCategories
575554

576555
opts.Levels[0] = pebble.LevelOptions{
@@ -607,6 +586,55 @@ func DefaultPebbleOptions() *pebble.Options {
607586
return opts
608587
}
609588

589+
var (
590+
spanPolicyLocalRangeIDEndKey = EncodeMVCCKey(MVCCKey{Key: keys.LocalRangeIDPrefix.AsRawKey().PrefixEnd()})
591+
spanPolicyLockTableStartKey = EncodeMVCCKey(MVCCKey{Key: keys.LocalRangeLockTablePrefix})
592+
spanPolicyLockTableEndKey = EncodeMVCCKey(MVCCKey{Key: keys.LocalRangeLockTablePrefix.PrefixEnd()})
593+
spanPolicyLocalEndKey = EncodeMVCCKey(MVCCKey{Key: keys.LocalPrefix.PrefixEnd()})
594+
)
595+
596+
// spanPolicyFunc is a pebble.SpanPolicyFunc that applies special policies for
597+
// the CockroachDB keyspace.
598+
func spanPolicyFunc(startKey []byte) (policy pebble.SpanPolicy, endKey []byte, _ error) {
599+
// There's no special policy for non-local keys.
600+
if !bytes.HasPrefix(startKey, keys.LocalPrefix) {
601+
return pebble.SpanPolicy{}, nil, nil
602+
}
603+
// Prefer fast compression for all local keys, since they shouldn't take up
604+
// a significant part of the space.
605+
policy.PreferFastCompression = true
606+
607+
// The first section of the local keyspace is the Range-ID keyspace. It
608+
// extends from the beginning of the keyspace to the Range Local keys. The
609+
// Range-ID keyspace includes the raft log, which is rarely read and
610+
// receives ~half the writes.
611+
if cockroachkvs.Compare(startKey, spanPolicyLocalRangeIDEndKey) < 0 {
612+
if !bytes.HasPrefix(startKey, keys.LocalRangeIDPrefix) {
613+
return pebble.SpanPolicy{}, nil, errors.AssertionFailedf("startKey %s is not a Range-ID key", startKey)
614+
}
615+
policy.ValueStoragePolicy = pebble.ValueStorageLatencyTolerant
616+
return policy, spanPolicyLocalRangeIDEndKey, nil
617+
}
618+
619+
// We also disable value separation for lock keys.
620+
if cockroachkvs.Compare(startKey, spanPolicyLockTableEndKey) >= 0 {
621+
// Not a lock key, so use default value separation within sstable (by
622+
// suffix) and into blob files.
623+
// NB: there won't actually be a suffix in these local keys.
624+
return policy, spanPolicyLocalEndKey, nil
625+
}
626+
if cockroachkvs.Compare(startKey, spanPolicyLockTableStartKey) < 0 {
627+
// Not a lock key, so use default value separation within sstable (by
628+
// suffix) and into blob files.
629+
// NB: there won't actually be a suffix in these local keys.
630+
return policy, spanPolicyLockTableStartKey, nil
631+
}
632+
// Lock key. Disable value separation.
633+
policy.DisableValueSeparationBySuffix = true
634+
policy.ValueStoragePolicy = pebble.ValueStorageLowReadLatency
635+
return policy, spanPolicyLockTableEndKey, nil
636+
}
637+
610638
func shortAttributeExtractorForValues(
611639
key []byte, keyPrefixLen int, value []byte,
612640
) (pebble.ShortAttribute, error) {

pkg/storage/pebble_test.go

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1762,3 +1762,66 @@ func TestPebbleCompactCancellation(t *testing.T) {
17621762
bfs.WaitForBlockAndUnblock()
17631763
wg.Wait()
17641764
}
1765+
1766+
func TestPebbleSpanPolicyFunc(t *testing.T) {
1767+
defer leaktest.AfterTest(t)()
1768+
defer log.Scope(t).Close(t)
1769+
1770+
type testCase struct {
1771+
startKey roachpb.Key
1772+
wantPolicy pebble.SpanPolicy
1773+
wantEndKey []byte
1774+
}
1775+
cases := []testCase{
1776+
{
1777+
startKey: keys.RaftHardStateKey(1),
1778+
wantPolicy: pebble.SpanPolicy{
1779+
PreferFastCompression: true,
1780+
ValueStoragePolicy: pebble.ValueStorageLatencyTolerant,
1781+
},
1782+
wantEndKey: spanPolicyLocalRangeIDEndKey,
1783+
},
1784+
{
1785+
startKey: keys.RaftLogKey(9, 2),
1786+
wantPolicy: pebble.SpanPolicy{
1787+
PreferFastCompression: true,
1788+
ValueStoragePolicy: pebble.ValueStorageLatencyTolerant,
1789+
},
1790+
wantEndKey: spanPolicyLocalRangeIDEndKey,
1791+
},
1792+
{
1793+
startKey: keys.RangeDescriptorKey(roachpb.RKey("a")),
1794+
wantPolicy: pebble.SpanPolicy{
1795+
PreferFastCompression: true,
1796+
},
1797+
wantEndKey: spanPolicyLockTableStartKey,
1798+
},
1799+
{
1800+
startKey: func() roachpb.Key {
1801+
k, _ := keys.LockTableSingleKey(roachpb.Key("a"), nil)
1802+
return k
1803+
}(),
1804+
wantPolicy: pebble.SpanPolicy{
1805+
PreferFastCompression: true,
1806+
DisableValueSeparationBySuffix: true,
1807+
ValueStoragePolicy: pebble.ValueStorageLowReadLatency,
1808+
},
1809+
wantEndKey: spanPolicyLockTableEndKey,
1810+
},
1811+
{
1812+
startKey: keys.SystemSQLCodec.IndexPrefix(1, 2),
1813+
wantPolicy: pebble.SpanPolicy{},
1814+
wantEndKey: nil,
1815+
},
1816+
}
1817+
1818+
for _, tc := range cases {
1819+
t.Run(fmt.Sprintf("%x", tc.startKey), func(t *testing.T) {
1820+
ek := EngineKey{Key: tc.startKey}.Encode()
1821+
policy, endKey, err := spanPolicyFunc(ek)
1822+
require.NoError(t, err)
1823+
require.Equal(t, tc.wantPolicy, policy)
1824+
require.Equal(t, tc.wantEndKey, endKey)
1825+
})
1826+
}
1827+
}

0 commit comments

Comments
 (0)