Skip to content

Commit 6703963

Browse files
committed
storage: raise MemTableStopWritesThreshold when memory permits
When there's sufficient memory available, raise the MemTableStopWritesThreshold up to as high as 16. We've observed instances of memtable write stalls in practice, especially in the presence of high latencies in cloud networked block devices. If the higher levels are not writing to the storage engine too fast, but flushes are unable to keep up because of disk slowness, we'd rather accumulate additional memtables that forcibly stall writes. Epic: none Fixes: #153673. Release note: None
1 parent 574b651 commit 6703963

File tree

6 files changed

+155
-2
lines changed

6 files changed

+155
-2
lines changed

pkg/server/BUILD.bazel

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -613,13 +613,15 @@ go_test(
613613
"//pkg/util/tracing",
614614
"//pkg/util/tracing/tracingpb",
615615
"//pkg/util/uuid",
616+
"@com_github_cockroachdb_crlib//crstrings",
616617
"@com_github_cockroachdb_datadriven//:datadriven",
617618
"@com_github_cockroachdb_errors//:errors",
618619
"@com_github_cockroachdb_logtags//:logtags",
619620
"@com_github_cockroachdb_pebble//:pebble",
620621
"@com_github_cockroachdb_pebble//vfs",
621622
"@com_github_cockroachdb_redact//:redact",
622623
"@com_github_dustin_go_humanize//:go-humanize",
624+
"@com_github_ghemawat_stream//:stream",
623625
"@com_github_gogo_protobuf//jsonpb",
624626
"@com_github_gogo_protobuf//proto",
625627
"@com_github_gorilla_mux//:mux",

pkg/server/config.go

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -743,9 +743,26 @@ func (cfg *Config) CreateEngines(ctx context.Context) (Engines, error) {
743743
if err != nil {
744744
return Engines{}, err
745745
}
746-
747746
log.Event(ctx, "initializing engines")
748747

748+
// The (pebble.Options).MemTableStopWritesThreshold configures the number of
749+
// memtables that may be queued before Pebble induces a write stall.
750+
// Queueing memtables consume memory from the block cache, evicting resident
751+
// blocks. If flushes are not keeping up and the count of queued memtables
752+
// grows too large, read performance will degrade severely:
753+
//
754+
// - Every read needs to seek in every queued memtable.
755+
// - Memtables take memory from the block cache, meaning that block
756+
// cache effectiveness decreases the more memtables that are queued.
757+
//
758+
// We constrain the count of queued memtables to be between 4 and 16. Within
759+
// those bounds, we'll grow it to use up to half of the block cache. If
760+
// there are multiple stores, we need to divide that half by the count of
761+
// stores.
762+
stopWritesThreshold := int(cfg.CacheSize/2/storage.DefaultMemtableSize) / len(cfg.Stores.Specs)
763+
stopWritesThreshold = max(stopWritesThreshold, 4)
764+
stopWritesThreshold = min(stopWritesThreshold, 16)
765+
749766
var fileCache *pebble.FileCache
750767
// TODO(radu): use the fileCache for in-memory stores as well.
751768
if physicalStores > 0 {
@@ -783,6 +800,7 @@ func (cfg *Config) CreateEngines(ctx context.Context) (Engines, error) {
783800
storage.Attributes(roachpb.Attributes{Attrs: spec.Attributes}),
784801
storage.If(storeKnobs.SmallEngineBlocks, storage.BlockSize(1)),
785802
storage.BlockConcurrencyLimitDivisor(len(cfg.Stores.Specs)),
803+
storage.MemTableStopWritesThreshold(stopWritesThreshold),
786804
}
787805
if len(storeKnobs.EngineKnobs) > 0 {
788806
storageConfigOpts = append(storageConfigOpts, storeKnobs.EngineKnobs...)

pkg/server/config_test.go

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,14 @@
66
package server
77

88
import (
9+
"bytes"
910
"context"
11+
"fmt"
12+
"io"
1013
"net"
1114
"os"
1215
"reflect"
16+
"strings"
1317
"sync/atomic"
1418
"testing"
1519
"time"
@@ -27,7 +31,10 @@ import (
2731
"github.com/cockroachdb/cockroach/pkg/util/log"
2832
"github.com/cockroachdb/cockroach/pkg/util/netutil"
2933
"github.com/cockroachdb/cockroach/pkg/util/netutil/addr"
34+
"github.com/cockroachdb/crlib/crstrings"
35+
"github.com/cockroachdb/datadriven"
3036
"github.com/cockroachdb/pebble/vfs"
37+
"github.com/ghemawat/stream"
3138
"github.com/kr/pretty"
3239
"github.com/stretchr/testify/assert"
3340
"github.com/stretchr/testify/require"
@@ -368,3 +375,76 @@ func TestIdProviderServerIdentityString(t *testing.T) {
368375
})
369376
}
370377
}
378+
379+
func TestCreateEngines(t *testing.T) {
380+
defer leaktest.AfterTest(t)()
381+
defer log.Scope(t).Close(t)
382+
383+
specs := map[string]base.StoreSpec{}
384+
385+
datadriven.RunTest(t, "testdata/create_engines", func(t *testing.T, d *datadriven.TestData) string {
386+
switch d.Cmd {
387+
case "store-spec":
388+
var name string
389+
d.ScanArgs(t, "name", &name)
390+
spec := base.StoreSpec{}
391+
spec.Path = name
392+
for _, line := range crstrings.Lines(d.Input) {
393+
parts := strings.SplitN(line, "=", 2)
394+
switch parts[0] {
395+
case "in-memory":
396+
spec.InMemory = true
397+
case "attrs":
398+
spec.Attributes = strings.Split(parts[1], ":")
399+
default:
400+
return fmt.Sprintf("unknown field: %q", parts[0])
401+
}
402+
}
403+
specs[name] = spec
404+
return ""
405+
case "create-engines":
406+
var cfg Config
407+
cfg.Settings = cluster.MakeTestingClusterSettings()
408+
409+
var pattern string
410+
d.ScanArgs(t, "pattern", &pattern)
411+
var specNames []string
412+
d.ScanArgs(t, "specs", &specNames)
413+
for _, specName := range specNames {
414+
spec, ok := specs[specName]
415+
if !ok {
416+
return fmt.Sprintf("unknown store: %q", specName)
417+
}
418+
cfg.Stores.Specs = append(cfg.Stores.Specs, spec)
419+
}
420+
d.MaybeScanArgs(t, "CacheSize", &cfg.CacheSize)
421+
422+
engines, err := cfg.CreateEngines(context.Background())
423+
if err != nil {
424+
return fmt.Sprintf("failed to create engines: %v", err)
425+
}
426+
defer engines.Close()
427+
428+
var buf bytes.Buffer
429+
for _, e := range engines {
430+
buf.WriteString(strings.TrimSpace(e.GetPebbleOptions().String()))
431+
buf.WriteString("\n")
432+
}
433+
return grepStr(&buf, pattern)
434+
default:
435+
return fmt.Sprintf("unknown command: %q", d.Cmd)
436+
}
437+
})
438+
}
439+
440+
func grepStr(r io.Reader, pattern string) string {
441+
var buf bytes.Buffer
442+
if err := stream.Run(stream.Sequence(
443+
stream.ReadLines(r),
444+
stream.Grep(pattern),
445+
stream.WriteLines(&buf),
446+
)); err != nil {
447+
return err.Error()
448+
}
449+
return buf.String()
450+
}

pkg/server/testdata/create_engines

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
store-spec name=a
2+
in-memory
3+
----
4+
5+
store-spec name=b
6+
in-memory
7+
----
8+
9+
# 64 MiB cache; should result in 4 memtable stop writes threshold.
10+
11+
create-engines specs=(a,b) pattern=mem_table_stop_writes_threshold CacheSize=67108864
12+
----
13+
mem_table_stop_writes_threshold=4
14+
mem_table_stop_writes_threshold=4
15+
16+
create-engines specs=(a) pattern=mem_table_stop_writes_threshold CacheSize=67108864
17+
----
18+
mem_table_stop_writes_threshold=4
19+
20+
# 1 GiB cache; should result in 8 memtable stop writes threshold with a single
21+
# store (with 64 MiB memtables).
22+
23+
create-engines specs=(b) pattern=mem_table_stop_writes_threshold CacheSize=1073741824
24+
----
25+
mem_table_stop_writes_threshold=8
26+
27+
# But if we add another store, the memtable stop writes threshold should be
28+
# halved back to the minimum of 4.
29+
30+
create-engines specs=(a, b) pattern=mem_table_stop_writes_threshold CacheSize=1073741824
31+
----
32+
mem_table_stop_writes_threshold=4
33+
mem_table_stop_writes_threshold=4
34+
35+
# 64 GiB cache; should result in 16 memtable stop writes threshold.
36+
37+
create-engines specs=(a,b) pattern=mem_table_stop_writes_threshold CacheSize=68719476736
38+
----
39+
mem_table_stop_writes_threshold=16
40+
mem_table_stop_writes_threshold=16

pkg/storage/open.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,16 @@ func MemtableSize(bytes uint64) ConfigOption {
215215
}
216216
}
217217

218+
// MemTableStopWritesThreshold configures the number of memtables that can be
219+
// queued before Pebble will induce a write stall, preventing all batch commits
220+
// from proceeding until a flush completes.
221+
func MemTableStopWritesThreshold(n int) ConfigOption {
222+
return func(cfg *engineConfig) error {
223+
cfg.opts.MemTableStopWritesThreshold = n
224+
return nil
225+
}
226+
}
227+
218228
// L0CompactionThreshold configures the amount of L0 read-amplification
219229
// necessary to trigger an L0 compaction.
220230
func L0CompactionThreshold(n int) ConfigOption {

pkg/storage/pebble.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -519,6 +519,9 @@ var MVCCMerger = &pebble.Merger{
519519
},
520520
}
521521

522+
// DefaultMemtableSize is the default size of a memtable.
523+
const DefaultMemtableSize = 64 << 20 // 64 MB
524+
522525
const mvccWallTimeIntervalCollector = "MVCCTimeInterval"
523526

524527
// DefaultPebbleOptions returns the default pebble options.
@@ -532,7 +535,7 @@ func DefaultPebbleOptions() *pebble.Options {
532535
L0CompactionThreshold: 2,
533536
L0StopWritesThreshold: 1000,
534537
LBaseMaxBytes: 64 << 20, // 64 MB
535-
MemTableSize: 64 << 20, // 64 MB
538+
MemTableSize: DefaultMemtableSize,
536539
MemTableStopWritesThreshold: 4,
537540
Merger: MVCCMerger,
538541
BlockPropertyCollectors: cockroachkvs.BlockPropertyCollectors,

0 commit comments

Comments
 (0)