Skip to content

Commit 7eba613

Browse files
craig[bot]RaduBerinde
andcommitted
Merge #145301
145301: storage: bump Pebble and add cluster settings for compaction concurrency r=RaduBerinde a=RaduBerinde #### go.mod: bump Pebble to 16f3b5764d35 Changes: * [`16f3b576`](cockroachdb/pebble@16f3b576) db: change MaxConcurrentCompactions() to return a range Release note: none. Epic: none. #### storage: fix up compaction concurrency code #### storage: add cluster settings for compaction concurrency Add two cluster settings that control the compaction concurrency limits: `storage.concurrent_compactions` and `storage.max_concurrent_compactions`. Fixes: #144963 Release note: None Co-authored-by: Radu Berinde <[email protected]>
2 parents a75d3b3 + 9658822 commit 7eba613

File tree

9 files changed

+189
-95
lines changed

9 files changed

+189
-95
lines changed

DEPS.bzl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1915,10 +1915,10 @@ def go_deps():
19151915
patches = [
19161916
"@com_github_cockroachdb_cockroach//build/patches:com_github_cockroachdb_pebble.patch",
19171917
],
1918-
sha256 = "37cf022e26e06aed9fceeaa5ba619d8b3fecc3a30b432db16cbd2f020e35db64",
1919-
strip_prefix = "github.com/cockroachdb/[email protected]20250428185656-a909ca0112a8",
1918+
sha256 = "0f33543d6861086e7db30dade452a9ac334417664b6dce42ed976303c0c41e4b",
1919+
strip_prefix = "github.com/cockroachdb/[email protected]20250429172450-e90517277fca",
19201920
urls = [
1921-
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/pebble/com_github_cockroachdb_pebble-v0.0.0-20250428185656-a909ca0112a8.zip",
1921+
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/pebble/com_github_cockroachdb_pebble-v0.0.0-20250429172450-e90517277fca.zip",
19221922
],
19231923
)
19241924
go_repository(

build/bazelutil/distdir_files.bzl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -365,7 +365,7 @@ DISTDIR_FILES = {
365365
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/gostdlib/com_github_cockroachdb_gostdlib-v1.19.0.zip": "c4d516bcfe8c07b6fc09b8a9a07a95065b36c2855627cb3514e40c98f872b69e",
366366
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/logtags/com_github_cockroachdb_logtags-v0.0.0-20241215232642-bb51bb14a506.zip": "920068af09e3846d9ebb4e4a7787ff1dd10f3989c5f940ad861b0f6a9f824f6e",
367367
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/metamorphic/com_github_cockroachdb_metamorphic-v0.0.0-20231108215700-4ba948b56895.zip": "28c8cf42192951b69378cf537be5a9a43f2aeb35542908cc4fe5f689505853ea",
368-
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/pebble/com_github_cockroachdb_pebble-v0.0.0-20250428185656-a909ca0112a8.zip": "37cf022e26e06aed9fceeaa5ba619d8b3fecc3a30b432db16cbd2f020e35db64",
368+
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/pebble/com_github_cockroachdb_pebble-v0.0.0-20250429172450-e90517277fca.zip": "0f33543d6861086e7db30dade452a9ac334417664b6dce42ed976303c0c41e4b",
369369
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/redact/com_github_cockroachdb_redact-v1.1.6.zip": "018eccb5fb9ca52d43ec9eaf213539d01c1f2b94e0e822406ebfb2e9321ef6cf",
370370
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/returncheck/com_github_cockroachdb_returncheck-v0.0.0-20200612231554-92cdbca611dd.zip": "ce92ba4352deec995b1f2eecf16eba7f5d51f5aa245a1c362dfe24c83d31f82b",
371371
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/stress/com_github_cockroachdb_stress-v0.0.0-20220803192808-1806698b1b7b.zip": "3fda531795c600daf25532a4f98be2a1335cd1e5e182c72789bca79f5f69fcc1",

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ require (
138138
github.com/cockroachdb/go-test-teamcity v0.0.0-20191211140407-cff980ad0a55
139139
github.com/cockroachdb/gostdlib v1.19.0
140140
github.com/cockroachdb/logtags v0.0.0-20241215232642-bb51bb14a506
141-
github.com/cockroachdb/pebble v0.0.0-20250428185656-a909ca0112a8
141+
github.com/cockroachdb/pebble v0.0.0-20250429172450-e90517277fca
142142
github.com/cockroachdb/redact v1.1.6
143143
github.com/cockroachdb/returncheck v0.0.0-20200612231554-92cdbca611dd
144144
github.com/cockroachdb/stress v0.0.0-20220803192808-1806698b1b7b

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -593,8 +593,8 @@ github.com/cockroachdb/logtags v0.0.0-20241215232642-bb51bb14a506 h1:ASDL+UJcILM
593593
github.com/cockroachdb/logtags v0.0.0-20241215232642-bb51bb14a506/go.mod h1:Mw7HqKr2kdtu6aYGn3tPmAftiP3QPX63LdK/zcariIo=
594594
github.com/cockroachdb/metamorphic v0.0.0-20231108215700-4ba948b56895 h1:XANOgPYtvELQ/h4IrmPAohXqe2pWA8Bwhejr3VQoZsA=
595595
github.com/cockroachdb/metamorphic v0.0.0-20231108215700-4ba948b56895/go.mod h1:aPd7gM9ov9M8v32Yy5NJrDyOcD8z642dqs+F0CeNXfA=
596-
github.com/cockroachdb/pebble v0.0.0-20250428185656-a909ca0112a8 h1:te6dfVW4VOJCOEbL2qXi+vMuzxuNrtkiDXLBcyz8K5I=
597-
github.com/cockroachdb/pebble v0.0.0-20250428185656-a909ca0112a8/go.mod h1:jyGqUZ9jl6TVX9HHi12f0JQ+h+oL75De//O43X898BA=
596+
github.com/cockroachdb/pebble v0.0.0-20250429172450-e90517277fca h1:Sc3bEuEcSAc8NijqNSE8WxyEZj0SYysK/wPJ5MNDTpk=
597+
github.com/cockroachdb/pebble v0.0.0-20250429172450-e90517277fca/go.mod h1:jyGqUZ9jl6TVX9HHi12f0JQ+h+oL75De//O43X898BA=
598598
github.com/cockroachdb/redact v1.1.3/go.mod h1:BVNblN9mBWFyMyqK1k3AAiSxhvhfK2oOZZ2lK+dpvRg=
599599
github.com/cockroachdb/redact v1.1.6 h1:zXJBwDZ84xJNlHl1rMyCojqyIxv+7YUpQiJLQ7n4314=
600600
github.com/cockroachdb/redact v1.1.6/go.mod h1:BVNblN9mBWFyMyqK1k3AAiSxhvhfK2oOZZ2lK+dpvRg=

pkg/storage/metamorphic/options.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ func randomOptions() *pebble.Options {
127127
opts.MemTableSize = 1 << rngIntRange(rng, 11, 28)
128128
opts.MemTableStopWritesThreshold = int(rngIntRange(rng, 2, 7))
129129
maxConcurrentCompactions := int(rngIntRange(rng, 1, 4))
130-
opts.MaxConcurrentCompactions = func() int { return maxConcurrentCompactions }
130+
opts.CompactionConcurrencyRange = func() (lower, upper int) { return 1, maxConcurrentCompactions }
131131

132132
opts.Cache = pebble.NewCache(1 << rngIntRange(rng, 1, 30))
133133
defer opts.Cache.Unref()

pkg/storage/mvcc.go

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,21 @@ var TargetBytesPerLockConflictError = settings.RegisterIntSetting(
103103

104104
var defaultMaxConcurrentCompactions = getDefaultMaxConcurrentCompactions()
105105

106+
// By default, we use up to min(GOMAXPROCS-1, 3) threads for background
107+
// compactions per store (reserving the final process for flushes).
106108
func getDefaultMaxConcurrentCompactions() int {
109+
const def = 3
110+
if n := runtime.GOMAXPROCS(0); n-1 < def {
111+
return max(n-1, 1)
112+
}
113+
return def
114+
}
115+
116+
// envMaxConcurrentCompactions is not zero if this node has an env var override
117+
// for the concurrency.
118+
var envMaxConcurrentCompactions = getMaxConcurrentCompactionsFromEnv()
119+
120+
func getMaxConcurrentCompactionsFromEnv() int {
107121
if v := envutil.EnvOrDefaultInt("COCKROACH_CONCURRENT_COMPACTIONS", 0); v > 0 {
108122
return v
109123
}
@@ -119,19 +133,31 @@ func getDefaultMaxConcurrentCompactions() int {
119133
if oldV := envutil.EnvOrDefaultInt("COCKROACH_ROCKSDB_CONCURRENCY", 0); oldV > 0 {
120134
return max(oldV-1, 1)
121135
}
136+
return 0
137+
}
122138

123-
// By default use up to min(GOMAXPROCS-1, 3) threads for background
124-
// compactions per store (reserving the final process for flushes).
125-
const upperLimit = 3
126-
if n := runtime.GOMAXPROCS(0); n-1 < upperLimit {
127-
return max(n-1, 1)
139+
// determineMaxConcurrentCompactions determines the upper limit on compaction
140+
// concurrency.
141+
//
142+
// Normally, we use the default limit of min(3, numCPU-1). This limit can be
143+
// changed via an environment variable or via a cluster setting. If both of
144+
// those are used, the maximum of them is taken.
145+
func determineMaxConcurrentCompactions(defaultValue int, envValue int, clusterSetting int) int {
146+
if envValue > 0 {
147+
if clusterSetting > 0 {
148+
return max(envValue, clusterSetting)
149+
}
150+
return envValue
151+
}
152+
if clusterSetting > 0 {
153+
return clusterSetting
128154
}
129-
return upperLimit
155+
return defaultValue
130156
}
131157

132158
// l0SubLevelCompactionConcurrency is the sub-level threshold at which to
133159
// allow an increase in compaction concurrency. The maximum is still
134-
// controlled by pebble.Options.MaxConcurrentCompactions. The default of 2
160+
// controlled by pebble.Options.CompactionConcurrencyRange. The default of 2
135161
// allows an additional compaction (so total 1 + 1 = 2 compactions) when the
136162
// sub-level count is 2, and increments concurrency by 1 whenever sub-level
137163
// count increases by 2 (so 1 + 2 = 3 compactions) when sub-level count is 4,

pkg/storage/open.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ func RemoteStorageFactory(accessor *cloud.EarlyBootExternalStorageAccessor) Conf
202202
// compactions an Engine will execute.
203203
func MaxConcurrentCompactions(n int) ConfigOption {
204204
return func(cfg *engineConfig) error {
205-
cfg.opts.MaxConcurrentCompactions = func() int { return n }
205+
cfg.opts.CompactionConcurrencyRange = func() (lower, upper int) { return 1, n }
206206
return nil
207207
}
208208
}

pkg/storage/pebble.go

Lines changed: 74 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,50 @@ var walFailoverUnhealthyOpThreshold = settings.RegisterDurationSetting(
282282
settings.WithPublic,
283283
)
284284

285+
// This cluster setting controls the baseline compaction concurrency (which
286+
// Pebble can dynamically increase up to the max concurrency; see
287+
// CompactionConcurrencyRange).
288+
//
289+
// When the value of this cluster setting is larger than the max concurrency
290+
// (see below), this cluster setting takes precedence (i.e. the max concurrency
291+
// will also equal this value).
292+
//
293+
// The baseline compaction concurrency is temporarily overridden while
294+
// crdb_internal.set_compaction_concurrency is running (which uses
295+
// SetCompactionConcurrency).
296+
var compactionConcurrencyLower = settings.RegisterIntSetting(
297+
settings.ApplicationLevel,
298+
"storage.compaction_concurrency",
299+
"the baseline number of concurrent compactions",
300+
1,
301+
settings.IntWithMinimum(1),
302+
)
303+
304+
// The maximum concurrency can be configured via an env var (which allows
305+
// per-node control) and/or this cluster setting (which applies to the entire
306+
// cluster).
307+
//
308+
// The environment variable is COCKROACH_CONCURRENT_COMPACTIONS (we also support
309+
// a deprecated variable, see getMaxConcurrentCompactionsFromEnv()).
310+
//
311+
// In the absence of any configuration, we use min(GOMAXPROCS-1, 3).
312+
//
313+
// If both the env variable and cluster setting are set, we take the maximum of
314+
// the two.
315+
//
316+
// In all cases, the maximum concurrency is temporarily overridden while
317+
// crdb_internal.set_compaction_concurrency is running (which uses
318+
// SetCompactionConcurrency).
319+
var compactionConcurrencyUpper = settings.RegisterIntSetting(
320+
settings.ApplicationLevel,
321+
"storage.max_compaction_concurrency",
322+
"the maximum number of concurrent compactions (0 = default); the default value is "+
323+
"min(3,numCPUs-1) or what the COCKROACH_CONCURRENT_COMPACTIONS env var specifies; "+
324+
"the env var also takes precedence over the cluster setting when the latter is lower",
325+
0,
326+
settings.NonNegativeInt,
327+
)
328+
285329
// TODO(ssd): This could be SystemOnly but we currently init pebble
286330
// engines for temporary storage. Temporary engines shouldn't really
287331
// care about download compactions, but they do currently simply
@@ -360,14 +404,10 @@ func DefaultPebbleOptions() *pebble.Options {
360404
KeySchema: DefaultKeySchema,
361405
KeySchemas: sstable.MakeKeySchemas(KeySchemas...),
362406
// A value of 2 triggers a compaction when there is 1 sub-level.
363-
L0CompactionThreshold: 2,
364-
L0StopWritesThreshold: 1000,
365-
LBaseMaxBytes: 64 << 20, // 64 MB
366-
Levels: make([]pebble.LevelOptions, 7),
367-
// NB: Options.MaxConcurrentCompactions may be "wrapped" in NewPebble to
368-
// allow overriding the max at runtime through
369-
// Engine.SetCompactionConcurrency.
370-
MaxConcurrentCompactions: func() int { return defaultMaxConcurrentCompactions },
407+
L0CompactionThreshold: 2,
408+
L0StopWritesThreshold: 1000,
409+
LBaseMaxBytes: 64 << 20, // 64 MB
410+
Levels: make([]pebble.LevelOptions, 7),
371411
MemTableSize: 64 << 20, // 64 MB
372412
MemTableStopWritesThreshold: 4,
373413
Merger: MVCCMerger,
@@ -676,17 +716,26 @@ func newPebble(ctx context.Context, cfg engineConfig) (p *Pebble, err error) {
676716
return getCompressionAlgorithm(ctx, cfg.settings, CompressionAlgorithmStorage)
677717
}
678718
}
679-
719+
// Note: the CompactionConcurrencyRange function will be wrapped below to
720+
// allow overriding the lower and upper values at runtime through
721+
// Engine.SetCompactionConcurrency.
722+
if cfg.opts.CompactionConcurrencyRange == nil {
723+
cfg.opts.CompactionConcurrencyRange = func() (lower, upper int) {
724+
lower = int(compactionConcurrencyLower.Get(&cfg.settings.SV))
725+
upper = determineMaxConcurrentCompactions(
726+
defaultMaxConcurrentCompactions,
727+
envMaxConcurrentCompactions,
728+
int(compactionConcurrencyUpper.Get(&cfg.settings.SV)),
729+
)
730+
return lower, max(lower, upper)
731+
}
732+
}
680733
if cfg.opts.MaxConcurrentDownloads == nil {
681734
cfg.opts.MaxConcurrentDownloads = func() int {
682735
return int(concurrentDownloadCompactions.Get(&cfg.settings.SV))
683736
}
684737
}
685738

686-
if cfg.opts.MaxConcurrentCompactions == nil {
687-
cfg.opts.MaxConcurrentCompactions = func() int { return defaultMaxConcurrentCompactions }
688-
}
689-
690739
cfg.opts.EnsureDefaults()
691740

692741
// The context dance here is done so that we have a clean context without
@@ -791,11 +840,9 @@ func newPebble(ctx context.Context, cfg engineConfig) (p *Pebble, err error) {
791840
diskWriteStatsCollector: cfg.DiskWriteStatsCollector,
792841
}
793842

794-
// MaxConcurrentCompactions can be set by multiple sources, but all the
795-
// sources will eventually call NewPebble. So, we override
796-
// cfg.opts.MaxConcurrentCompactions to a closure which allows ovderriding the
797-
// value.
798-
cfg.opts.MaxConcurrentCompactions = p.cco.Wrap(cfg.opts.MaxConcurrentCompactions)
843+
// Wrap the CompactionConcurrencyRange function to allow overriding the lower
844+
// and upper values at runtime through Engine.SetCompactionConcurrency.
845+
cfg.opts.CompactionConcurrencyRange = p.cco.Wrap(cfg.opts.CompactionConcurrencyRange)
799846

800847
// NB: The ordering of the event listeners passed to TeeEventListener is
801848
// deliberate. The listener returned by makeMetricEtcEventListener is
@@ -2740,8 +2787,7 @@ func (e *ExceedMaxSizeError) Error() string {
27402787
return fmt.Sprintf("export size (%d bytes) exceeds max size (%d bytes)", e.reached, e.maxSize)
27412788
}
27422789

2743-
// compactionConcurrencyOverride allows overriding the max concurrent
2744-
// compactions.
2790+
// compactionConcurrencyOverride allows overriding the compaction concurrency.
27452791
type compactionConcurrencyOverride struct {
27462792
override atomic.Uint32
27472793
}
@@ -2751,12 +2797,15 @@ func (cco *compactionConcurrencyOverride) Set(value uint32) {
27512797
cco.override.Store(value)
27522798
}
27532799

2754-
// Wrap a MaxConcurrentCompactions function to take into account the override.
2755-
func (cco *compactionConcurrencyOverride) Wrap(maxConcurrentCompactions func() int) func() int {
2756-
return func() int {
2800+
// Wrap a CompactionConcurrencyRange function to take into account the override.
2801+
func (cco *compactionConcurrencyOverride) Wrap(
2802+
compactionConcurrencyRange func() (lower, upper int),
2803+
) func() (lower, upper int) {
2804+
return func() (lower, upper int) {
27572805
if o := cco.override.Load(); o > 0 {
2758-
return int(o)
2806+
// We override both the lower and upper limits.
2807+
return int(o), int(o)
27592808
}
2760-
return maxConcurrentCompactions()
2809+
return compactionConcurrencyRange()
27612810
}
27622811
}

0 commit comments

Comments
 (0)