Skip to content

Commit d05c0fe

Browse files
ronak-07facebook-github-bot
authored andcommitted
Option to make write group size configurable (facebook#5759)
Summary: The max batch size that we can write to the WAL is controlled by a static manner. So if the leader write is less than 128 KB we will have the batch size as leader write size + 128 KB else the limit will be 1 MB. Both of them are statically defined. Pull Request resolved: facebook#5759 Differential Revision: D17329298 fbshipit-source-id: a3d910629d8d8ca84ea39ad89c2b2d284571ded5
1 parent 9eb3e1f commit d05c0fe

File tree

7 files changed

+35
-6
lines changed

7 files changed

+35
-6
lines changed

db/write_thread.cc

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ WriteThread::WriteThread(const ImmutableDBOptions& db_options)
2222
allow_concurrent_memtable_write_(
2323
db_options.allow_concurrent_memtable_write),
2424
enable_pipelined_write_(db_options.enable_pipelined_write),
25+
max_write_batch_group_size_bytes(
26+
db_options.max_write_batch_group_size_bytes),
2527
newest_writer_(nullptr),
2628
newest_memtable_writer_(nullptr),
2729
last_sequence_(0),
@@ -406,9 +408,10 @@ size_t WriteThread::EnterAsBatchGroupLeader(Writer* leader,
406408
// Allow the group to grow up to a maximum size, but if the
407409
// original write is small, limit the growth so we do not slow
408410
// down the small write too much.
409-
size_t max_size = 1 << 20;
410-
if (size <= (128 << 10)) {
411-
max_size = size + (128 << 10);
411+
size_t max_size = max_write_batch_group_size_bytes;
412+
const uint64_t min_batch_size_bytes = max_write_batch_group_size_bytes / 8;
413+
if (size <= min_batch_size_bytes) {
414+
max_size = size + min_batch_size_bytes;
412415
}
413416

414417
leader->write_group = write_group;
@@ -485,9 +488,10 @@ void WriteThread::EnterAsMemTableWriter(Writer* leader,
485488
// Allow the group to grow up to a maximum size, but if the
486489
// original write is small, limit the growth so we do not slow
487490
// down the small write too much.
488-
size_t max_size = 1 << 20;
489-
if (size <= (128 << 10)) {
490-
max_size = size + (128 << 10);
491+
size_t max_size = max_write_batch_group_size_bytes;
492+
const uint64_t min_batch_size_bytes = max_write_batch_group_size_bytes / 8;
493+
if (size <= min_batch_size_bytes) {
494+
max_size = size + min_batch_size_bytes;
491495
}
492496

493497
leader->write_group = write_group;

db/write_thread.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,11 @@ class WriteThread {
360360
// Enable pipelined write to WAL and memtable.
361361
const bool enable_pipelined_write_;
362362

363+
// The maximum limit of number of bytes that are written in a single batch
364+
// of WAL or memtable write. It is followed when the leader write size
365+
// is larger than 1/8 of this limit.
366+
const uint64_t max_write_batch_group_size_bytes;
367+
363368
// Points to the newest pending writer. Only leader can remove
364369
// elements, adding can be done lock-free by anybody.
365370
std::atomic<Writer*> newest_writer_;

include/rocksdb/options.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -953,6 +953,13 @@ struct DBOptions {
953953
// Default: true
954954
bool enable_write_thread_adaptive_yield = true;
955955

956+
// The maximum limit of number of bytes that are written in a single batch
957+
// of WAL or memtable write. It is followed when the leader write size
958+
// is larger than 1/8 of this limit.
959+
//
960+
// Default: 1 MB
961+
uint64_t max_write_batch_group_size_bytes = 1 << 20;
962+
956963
// The maximum number of microseconds that a write operation will use
957964
// a yielding spin loop to coordinate with other write threads before
958965
// blocking on a mutex. (Assuming write_thread_slow_yield_usec is

options/db_options.cc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ ImmutableDBOptions::ImmutableDBOptions(const DBOptions& options)
4444
table_cache_numshardbits(options.table_cache_numshardbits),
4545
wal_ttl_seconds(options.WAL_ttl_seconds),
4646
wal_size_limit_mb(options.WAL_size_limit_MB),
47+
max_write_batch_group_size_bytes(
48+
options.max_write_batch_group_size_bytes),
4749
manifest_preallocation_size(options.manifest_preallocation_size),
4850
allow_mmap_reads(options.allow_mmap_reads),
4951
allow_mmap_writes(options.allow_mmap_writes),
@@ -153,6 +155,10 @@ void ImmutableDBOptions::Dump(Logger* log) const {
153155
ROCKS_LOG_HEADER(log,
154156
" Options.WAL_size_limit_MB: %" PRIu64,
155157
wal_size_limit_mb);
158+
ROCKS_LOG_HEADER(log,
159+
" "
160+
"Options.max_write_batch_group_size_bytes: %" PRIu64,
161+
max_write_batch_group_size_bytes);
156162
ROCKS_LOG_HEADER(
157163
log, " Options.manifest_preallocation_size: %" ROCKSDB_PRIszt,
158164
manifest_preallocation_size);

options/db_options.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ struct ImmutableDBOptions {
4343
int table_cache_numshardbits;
4444
uint64_t wal_ttl_seconds;
4545
uint64_t wal_size_limit_mb;
46+
uint64_t max_write_batch_group_size_bytes;
4647
size_t manifest_preallocation_size;
4748
bool allow_mmap_reads;
4849
bool allow_mmap_writes;

options/options_helper.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,8 @@ DBOptions BuildDBOptions(const ImmutableDBOptions& immutable_db_options,
110110
immutable_db_options.allow_concurrent_memtable_write;
111111
options.enable_write_thread_adaptive_yield =
112112
immutable_db_options.enable_write_thread_adaptive_yield;
113+
options.max_write_batch_group_size_bytes =
114+
immutable_db_options.max_write_batch_group_size_bytes;
113115
options.write_thread_max_yield_usec =
114116
immutable_db_options.write_thread_max_yield_usec;
115117
options.write_thread_slow_yield_usec =
@@ -1611,6 +1613,9 @@ std::unordered_map<std::string, OptionTypeInfo>
16111613
{"write_thread_slow_yield_usec",
16121614
{offsetof(struct DBOptions, write_thread_slow_yield_usec),
16131615
OptionType::kUInt64T, OptionVerificationType::kNormal, false, 0}},
1616+
{"max_write_batch_group_size_bytes",
1617+
{offsetof(struct DBOptions, max_write_batch_group_size_bytes),
1618+
OptionType::kUInt64T, OptionVerificationType::kNormal, false, 0}},
16141619
{"write_thread_max_yield_usec",
16151620
{offsetof(struct DBOptions, write_thread_max_yield_usec),
16161621
OptionType::kUInt64T, OptionVerificationType::kNormal, false, 0}},

options/options_settable_test.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,7 @@ TEST_F(OptionsSettableTest, DBOptionsAllFieldsSettable) {
229229
"delete_obsolete_files_period_micros=4294967758;"
230230
"WAL_ttl_seconds=4295008036;"
231231
"WAL_size_limit_MB=4295036161;"
232+
"max_write_batch_group_size_bytes=1048576;"
232233
"wal_dir=path/to/wal_dir;"
233234
"db_write_buffer_size=2587;"
234235
"max_subcompactions=64330;"

0 commit comments

Comments
 (0)