Skip to content

Commit 653b029

Browse files
committed
db: min size for mvcc separation should be configurable
We will add a field, `MinimumMVCCGarbageSize`, that specifies the minimum value size required for likely MVCC garbage to be eligible for separation. Fixes: #5377
1 parent 7b8c50b commit 653b029

20 files changed

+176
-110
lines changed

compaction.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3476,14 +3476,15 @@ func (d *DB) compactAndWrite(
34763476
writerOpts.Compression = block.FastestCompression
34773477
}
34783478
vSep := valueSeparation
3479-
switch spanPolicy.ValueStoragePolicy {
3480-
case ValueStorageLowReadLatency:
3479+
switch spanPolicy.ValueStoragePolicy.PolicyAdjustment {
3480+
case NoValueSeparation:
34813481
vSep = compact.NeverSeparateValues{}
3482-
case ValueStorageLatencyTolerant:
3482+
case Override:
34833483
// This span of keyspace is more tolerant of latency, so set a more
34843484
// aggressive value separation policy for this output.
34853485
vSep.SetNextOutputConfig(compact.ValueSeparationOutputConfig{
3486-
MinimumSize: latencyTolerantMinimumSize,
3486+
MinimumSize: spanPolicy.ValueStoragePolicy.MinimumSize,
3487+
MinimumMVCCGarbageSize: spanPolicy.ValueStoragePolicy.MinimumMVCCGarbageSize,
34873488
})
34883489
}
34893490
objMeta, tw, err := d.newCompactionOutputTable(jobID, c, writerOpts)

data_test.go

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1783,21 +1783,27 @@ func parseDBOptionsArgs(opts *Options, args []datadriven.CmdArg) error {
17831783
}
17841784
policy := SpanPolicy{
17851785
DisableValueSeparationBySuffix: true,
1786-
ValueStoragePolicy: ValueStorageLowReadLatency,
1786+
ValueStoragePolicy: ValueStoragePolicy{
1787+
PolicyAdjustment: NoValueSeparation,
1788+
},
17871789
}
17881790
spanPolicies = append(spanPolicies, SpanAndPolicy{
17891791
KeyRange: span,
17901792
Policy: policy,
17911793
})
1792-
case "latency-tolerant-span":
1794+
case "override-span":
17931795
if len(cmdArg.Vals) != 2 {
1794-
return errors.New("latency-tolerant-span expects 2 arguments: <start-key> <end-key>")
1796+
return errors.New("override-span expects 2 arguments: <start-key> <end-key>")
17951797
}
17961798
span := KeyRange{
17971799
Start: []byte(cmdArg.Vals[0]),
17981800
End: []byte(cmdArg.Vals[1]),
17991801
}
1800-
policy := SpanPolicy{ValueStoragePolicy: ValueStorageLatencyTolerant}
1802+
policy := SpanPolicy{ValueStoragePolicy: ValueStoragePolicy{
1803+
PolicyAdjustment: Override,
1804+
MinimumSize: latencyTolerantMinimumSize,
1805+
MinimumMVCCGarbageSize: 1,
1806+
}}
18011807
spanPolicies = append(spanPolicies, SpanAndPolicy{
18021808
KeyRange: span,
18031809
Policy: policy,
@@ -1843,6 +1849,9 @@ func parseDBOptionsArgs(opts *Options, args []datadriven.CmdArg) error {
18431849
switch name {
18441850
case "enabled", "disabled":
18451851
policy.Enabled = name == "enabled"
1852+
if policy.Enabled {
1853+
policy.MinimumSize = latencyTolerantMinimumSize
1854+
}
18461855
case "min-size":
18471856
policy.MinimumSize, err = strconv.Atoi(value)
18481857
if err != nil {
@@ -1871,6 +1880,11 @@ func parseDBOptionsArgs(opts *Options, args []datadriven.CmdArg) error {
18711880
if err != nil {
18721881
return err
18731882
}
1883+
case "min-mvcc-garbage-size":
1884+
policy.MinimumMVCCGarbageSize, err = strconv.Atoi(value)
1885+
if err != nil {
1886+
return err
1887+
}
18741888
default:
18751889
return errors.Newf("unrecognized value-separation argument %q", name)
18761890
}

event_listener_test.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -633,9 +633,10 @@ func TestBlobCorruptionEvent(t *testing.T) {
633633
}
634634
opts.Experimental.ValueSeparationPolicy = func() ValueSeparationPolicy {
635635
return ValueSeparationPolicy{
636-
Enabled: true,
637-
MinimumSize: 1,
638-
MaxBlobReferenceDepth: 10,
636+
Enabled: true,
637+
MinimumSize: 1,
638+
MinimumMVCCGarbageSize: 1,
639+
MaxBlobReferenceDepth: 10,
639640
}
640641
}
641642
d, err := Open("", opts)

internal/compact/run.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,9 @@ type ValueSeparationOutputConfig struct {
115115
// MinimumSize is the minimum size of a value that will be separated into a
116116
// blob file.
117117
MinimumSize int
118+
// MinimumMVCCGarbageSize is the minimum size of a value that will be
119+
// separated into a blob file if the value is likely MVCC garbage.
120+
MinimumMVCCGarbageSize int
118121
}
119122

120123
// ValueSeparation defines an interface for writing some values to separate blob
@@ -344,7 +347,7 @@ func (r *Runner) writeKeysToTable(
344347
}
345348

346349
valueLen := kv.V.Len()
347-
isLikelyMVCCGarbage := sstable.IsLikelyMVCCGarbage(kv.K.UserKey, prevKeyKind, kv.K.Kind(), valueLen, prefixEqual)
350+
isLikelyMVCCGarbage := sstable.IsLikelyMVCCGarbage(kv.K.UserKey, prevKeyKind, kv.K.Kind(), prefixEqual)
348351
// Add the value to the sstable, possibly separating its value into a
349352
// blob file. The ValueSeparation implementation is responsible for
350353
// writing the KV to the sstable.

metamorphic/options.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,7 @@ func defaultOptions(kf KeyFormat) *pebble.Options {
341341
return pebble.ValueSeparationPolicy{
342342
Enabled: true,
343343
MinimumSize: 5,
344+
MinimumMVCCGarbageSize: 1,
344345
MaxBlobReferenceDepth: 3,
345346
RewriteMinimumAge: 50 * time.Millisecond,
346347
GarbageRatioLowPriority: 0.10, // 10% garbage
@@ -926,6 +927,7 @@ func RandomOptions(rng *rand.Rand, kf KeyFormat, cfg RandomOptionsCfg) *TestOpti
926927
policy := pebble.ValueSeparationPolicy{
927928
Enabled: true,
928929
MinimumSize: 1 + rng.IntN(maxValueSize),
930+
MinimumMVCCGarbageSize: 1 + rng.IntN(9),
929931
MaxBlobReferenceDepth: 2 + rng.IntN(9), // 2-10
930932
RewriteMinimumAge: time.Duration(rng.IntN(90)+10) * time.Millisecond, // [10ms, 100ms)
931933
GarbageRatioLowPriority: lowPri,

metrics_test.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -254,9 +254,10 @@ func TestMetrics(t *testing.T) {
254254
opts.Experimental.EnableValueBlocks = func() bool { return true }
255255
opts.Experimental.ValueSeparationPolicy = func() ValueSeparationPolicy {
256256
return ValueSeparationPolicy{
257-
Enabled: true,
258-
MinimumSize: 3,
259-
MaxBlobReferenceDepth: 5,
257+
Enabled: true,
258+
MinimumSize: 3,
259+
MinimumMVCCGarbageSize: 1,
260+
MaxBlobReferenceDepth: 5,
260261
}
261262
}
262263
opts.TargetFileSizes[0] = 50

options.go

Lines changed: 45 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1238,6 +1238,13 @@ type ValueSeparationPolicy struct {
12381238
//
12391239
// MinimumSize must be > 0.
12401240
MinimumSize int
1241+
// MinimumMVCCGarbageSize specifies the minimum size of a value that can be
1242+
// separated into a blob file if said value is likely MVCC garbage. This
1243+
// applies only to SpanPolicies that permit separation of MVCC garbage,
1244+
// which is also the default.
1245+
//
1246+
// MinimumMVCCGarbageSize must be > 0.
1247+
MinimumMVCCGarbageSize int
12411248
// MaxBlobReferenceDepth limits the number of potentially overlapping (in
12421249
// the keyspace) blob files that can be referenced by a single sstable. If a
12431250
// compaction may produce an output sstable referencing more than this many
@@ -1311,36 +1318,51 @@ func (p SpanPolicy) String() string {
13111318
if p.DisableValueSeparationBySuffix {
13121319
sb.WriteString("disable-value-separation-by-suffix,")
13131320
}
1314-
switch p.ValueStoragePolicy {
1315-
case ValueStorageLowReadLatency:
1316-
sb.WriteString("low-read-latency,")
1317-
case ValueStorageLatencyTolerant:
1318-
sb.WriteString("latency-tolerant,")
1321+
switch p.ValueStoragePolicy.PolicyAdjustment {
1322+
case NoValueSeparation:
1323+
sb.WriteString("no-value-separation,")
1324+
case Override:
1325+
sb.WriteString("override,")
13191326
}
13201327
return strings.TrimSuffix(sb.String(), ",")
13211328
}
13221329

1323-
// ValueStoragePolicy is a hint used to determine where to store the values for
1324-
// KVs.
1325-
type ValueStoragePolicy uint8
1330+
// ValueStoragePolicy is used to determine where to store the values for
1331+
// KVs. If the PolicyAdjustment specified is Override, the remaining fields
1332+
// are used to override the global configuration for value separation.
1333+
type ValueStoragePolicy struct {
1334+
// PolicyAdjustment specifies the policy adjustment to apply.
1335+
PolicyAdjustment ValueStoragePolicyAdjustment
1336+
// Remaining fields are ignored, unless the PolicyAdjustment is Override.
1337+
1338+
// MinimumSize is the minimum size of the value.
1339+
MinimumSize int
1340+
// MinimumMVCCGarbageSize is the minimum size of the value that is likely
1341+
// MVCC garbage.
1342+
MinimumMVCCGarbageSize int
1343+
}
1344+
1345+
// ValueStoragePolicyAdjustment is a hint used to determine where to store the
1346+
// values for KVs.
1347+
type ValueStoragePolicyAdjustment uint8
13261348

13271349
const (
1328-
// ValueStorageDefault is the default value; Pebble will respect global
1329-
// configuration for value blocks and value separation.
1330-
ValueStorageDefault ValueStoragePolicy = iota
1350+
// UseDefault is the default value; Pebble will respect global
1351+
// configuration for value separation.
1352+
UseDefault ValueStoragePolicyAdjustment = iota
13311353

1332-
// ValueStorageLowReadLatency indicates Pebble should prefer storing values
1354+
// NoValueSeparation indicates Pebble should prefer storing values
13331355
// in-place.
1334-
ValueStorageLowReadLatency
1356+
NoValueSeparation
13351357

1336-
// ValueStorageLatencyTolerant indicates value retrieval can tolerate
1358+
// Override indicates value retrieval can tolerate
13371359
// additional latency, so Pebble should aggressively prefer storing values
13381360
// separately if it can reduce write amplification.
13391361
//
13401362
// If the global Options' enable value separation, Pebble may choose to
13411363
// separate values under the LatencyTolerant policy even if they do not meet
13421364
// the minimum size threshold of the global Options' ValueSeparationPolicy.
1343-
ValueStorageLatencyTolerant
1365+
Override
13441366
)
13451367

13461368
// SpanPolicyFunc is used to determine the SpanPolicy for a key region.
@@ -1855,6 +1877,7 @@ func (o *Options) String() string {
18551877
fmt.Fprintln(&buf, "[Value Separation]")
18561878
fmt.Fprintf(&buf, " enabled=%t\n", policy.Enabled)
18571879
fmt.Fprintf(&buf, " minimum_size=%d\n", policy.MinimumSize)
1880+
fmt.Fprintf(&buf, " minimum_mvcc_garbage_size=%d\n", policy.MinimumMVCCGarbageSize)
18581881
fmt.Fprintf(&buf, " max_blob_reference_depth=%d\n", policy.MaxBlobReferenceDepth)
18591882
fmt.Fprintf(&buf, " rewrite_minimum_age=%s\n", policy.RewriteMinimumAge)
18601883
fmt.Fprintf(&buf, " garbage_ratio_low_priority=%.2f\n", policy.GarbageRatioLowPriority)
@@ -2300,6 +2323,10 @@ func (o *Options) Parse(s string, hooks *ParseHooks) error {
23002323
var minimumSize int
23012324
minimumSize, err = strconv.Atoi(value)
23022325
valSepPolicy.MinimumSize = minimumSize
2326+
case "minimum_mvcc_garbage_size":
2327+
var minimumMVCCGarbageSize int
2328+
minimumMVCCGarbageSize, err = strconv.Atoi(value)
2329+
valSepPolicy.MinimumMVCCGarbageSize = minimumMVCCGarbageSize
23032330
case "max_blob_reference_depth":
23042331
valSepPolicy.MaxBlobReferenceDepth, err = strconv.Atoi(value)
23052332
case "rewrite_minimum_age":
@@ -2571,6 +2598,9 @@ func (o *Options) Validate() error {
25712598
if policy.MinimumSize <= 0 {
25722599
fmt.Fprintf(&buf, "ValueSeparationPolicy.MinimumSize (%d) must be > 0\n", policy.MinimumSize)
25732600
}
2601+
if policy.MinimumMVCCGarbageSize <= 0 {
2602+
fmt.Fprintf(&buf, "ValueSeparationPolicy.MinimumMVCCGarbageSize (%d) must be > 0\n", policy.MinimumMVCCGarbageSize)
2603+
}
25742604
if policy.MaxBlobReferenceDepth <= 0 {
25752605
fmt.Fprintf(&buf, "ValueSeparationPolicy.MaxBlobReferenceDepth (%d) must be > 0\n", policy.MaxBlobReferenceDepth)
25762606
}

options_test.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,10 @@ func (o *Options) randomizeForTesting(t testing.TB) {
4242
if o.FormatMajorVersion >= FormatValueSeparation && o.Experimental.ValueSeparationPolicy == nil && rand.Int64N(4) > 0 {
4343
lowPri := 0.1 + rand.Float64()*0.9 // [0.1, 1.0)
4444
policy := ValueSeparationPolicy{
45-
Enabled: true,
46-
MinimumSize: 1 << rand.IntN(10), // [1, 512]
47-
MaxBlobReferenceDepth: rand.IntN(10) + 1, // [1, 10)
45+
Enabled: true,
46+
MinimumSize: 1 << rand.IntN(10), // [1, 512]
47+
MinimumMVCCGarbageSize: rand.IntN(10) + 1, // [1, 10]
48+
MaxBlobReferenceDepth: rand.IntN(10) + 1, // [1, 10]
4849
// Constrain the rewrite minimum age to [0, 15s).
4950
RewriteMinimumAge: time.Duration(rand.IntN(15)) * time.Second,
5051
GarbageRatioLowPriority: lowPri,
@@ -259,7 +260,6 @@ func TestOptionsCheckCompatibility(t *testing.T) {
259260

260261
// Check that an OPTIONS file that configured an explicit WALDir that will
261262
// no longer be used errors if it's not also present in WALRecoveryDirs.
262-
//require.Equal(t, ErrMissingWALRecoveryDir{Dir: "external-wal-dir"},
263263
err := DefaultOptions().CheckCompatibility(storeDir, `
264264
[Options]
265265
wal_dir=external-wal-dir
@@ -648,10 +648,10 @@ func TestStaticSpanPolicyFunc(t *testing.T) {
648648
sap.KeyRange.End = []byte(p.Next())
649649
p.Expect(":")
650650
switch tok := p.Next(); tok {
651-
case "lowlatency":
652-
sap.Policy.ValueStoragePolicy = ValueStorageLowReadLatency
653-
case "latencytolerant":
654-
sap.Policy.ValueStoragePolicy = ValueStorageLatencyTolerant
651+
case "novalueseparation":
652+
sap.Policy.ValueStoragePolicy.PolicyAdjustment = NoValueSeparation
653+
case "override":
654+
sap.Policy.ValueStoragePolicy.PolicyAdjustment = Override
655655
default:
656656
t.Fatalf("unknown policy: %s", tok)
657657
}

replay/replay_test.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -350,10 +350,11 @@ func collectCorpus(t *testing.T, fs *vfs.MemFS, name string) {
350350
}
351351
opts.Experimental.ValueSeparationPolicy = func() pebble.ValueSeparationPolicy {
352352
return pebble.ValueSeparationPolicy{
353-
Enabled: true,
354-
MinimumSize: 3,
355-
MaxBlobReferenceDepth: 5,
356-
RewriteMinimumAge: 15 * time.Minute,
353+
Enabled: true,
354+
MinimumSize: 3,
355+
MinimumMVCCGarbageSize: 1,
356+
MaxBlobReferenceDepth: 5,
357+
RewriteMinimumAge: 15 * time.Minute,
357358
}
358359
}
359360
setDefaultExperimentalOpts(opts)

replay/testdata/replay_val_sep

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ tree
1717
0 LOCK
1818
152 MANIFEST-000010
1919
250 MANIFEST-000013
20-
2947 OPTIONS-000003
20+
2977 OPTIONS-000003
2121
0 marker.format-version.000011.024
2222
0 marker.manifest.000003.MANIFEST-000013
2323
simple_val_sep/
@@ -32,7 +32,7 @@ tree
3232
11 000011.log
3333
707 000012.sst
3434
187 MANIFEST-000013
35-
2947 OPTIONS-000003
35+
2977 OPTIONS-000003
3636
0 marker.format-version.000001.024
3737
0 marker.manifest.000001.MANIFEST-000013
3838

@@ -93,6 +93,7 @@ cat build/OPTIONS-000003
9393
[Value Separation]
9494
enabled=true
9595
minimum_size=3
96+
minimum_mvcc_garbage_size=1
9697
max_blob_reference_depth=5
9798
rewrite_minimum_age=15m0s
9899
garbage_ratio_low_priority=0.00

0 commit comments

Comments
 (0)