Skip to content

Commit 8c491bb

Browse files
committed
WAL: remove write strategies.
These have not proved useful o_sync perform very badly and sync_after_notify only provides benefit when there is hardly any activity. It is better to settle for a more predictable mode like the default write + sync provides.
1 parent a2db6e1 commit 8c491bb

File tree

6 files changed

+28
-94
lines changed

6 files changed

+28
-94
lines changed

README.md

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -348,20 +348,6 @@ is available in a separate repository.
348348
<td>Indicate whether the wal should compute and validate checksums. Default: `true`</td>
349349
<td>Boolean</td>
350350
</tr>
351-
<tr>
352-
<td>wal_write_strategy</td>
353-
<td>
354-
<ul>
355-
<li>
356-
<code>default</code>: used by default. <code>write(2)</code> system calls are delayed until a buffer is due to be flushed. Then it writes all the data in a single call then <code>fsync</code>s. Fastest option but incurs some additional memory use.
357-
</li>
358-
<li>
359-
<code>o_sync</code>: Like default but will try to open the file with O_SYNC and thus won't need the additional <code>fsync(2)</code> system call. If it fails to open the file with this flag this mode falls back to default.
360-
</li>
361-
</ul>
362-
</td>
363-
<td>Enumeration: <code>default</code> | <code>o_sync</code></td>
364-
</tr>
365351
<tr>
366352
<td>wal_sync_method</td>
367353
<td>

src/ra.erl

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,7 @@
8585
{wal_data_dir, file:filename()} |
8686
{segment_max_entries, non_neg_integer()} |
8787
{wal_max_size_bytes, non_neg_integer()} |
88-
{wal_compute_checksums, boolean()} |
89-
{wal_write_strategy, default | o_sync}.
88+
{wal_compute_checksums, boolean()}.
9089

9190
-type query_fun() :: fun((term()) -> term()) |
9291
{M :: module(), F :: atom(), A :: list()}.

src/ra_log_sup.erl

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,6 @@ make_wal_conf(#{data_dir := DataDir,
6565
MaxBatchSize = maps:get(wal_max_batch_size, Cfg,
6666
?WAL_DEFAULT_MAX_BATCH_SIZE),
6767
MaxEntries = maps:get(wal_max_entries, Cfg, undefined),
68-
Strategy = maps:get(wal_write_strategy, Cfg, default),
6968
SyncMethod = maps:get(wal_sync_method, Cfg, datasync),
7069
HibAfter = maps:get(wal_hibernate_after, Cfg, undefined),
7170
Gc = maps:get(wal_garbage_collect, Cfg, false),
@@ -78,7 +77,6 @@ make_wal_conf(#{data_dir := DataDir,
7877
dir => WalDir,
7978
segment_writer => SegWriterName,
8079
compute_checksums => ComputeChecksums,
81-
write_strategy => Strategy,
8280
max_size_bytes => MaxSizeBytes,
8381
max_entries => MaxEntries,
8482
sync_method => SyncMethod,

src/ra_log_wal.erl

Lines changed: 11 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -67,16 +67,6 @@
6767
pending = [] :: iolist()
6868
}).
6969

70-
-type wal_write_strategy() ::
71-
% writes all pending in one write(2) call then calls fsync(1)
72-
default |
73-
% like default but tries to open the file using synchronous io
74-
% (O_SYNC) rather than a write(2) followed by an fsync.
75-
o_sync |
76-
%% low latency mode where writers are notifies _before_ syncing
77-
%% but after writing.
78-
sync_after_notify.
79-
8070
-type writer_name_cache() :: {NextIntId :: non_neg_integer(),
8171
#{writer_id() => binary()}}.
8272

@@ -86,7 +76,6 @@
8676
max_size_bytes :: non_neg_integer(),
8777
max_entries :: undefined | non_neg_integer(),
8878
recovery_chunk_size = ?WAL_RECOVERY_CHUNK_SIZE :: non_neg_integer(),
89-
write_strategy = default :: wal_write_strategy(),
9079
sync_method = datasync :: sync | datasync | none,
9180
counter :: counters:counters_ref(),
9281
mem_tables_tid :: ets:tid(),
@@ -139,7 +128,6 @@
139128
segment_writer => atom() | pid(),
140129
compute_checksums => boolean(),
141130
pre_allocate => boolean(),
142-
write_strategy => wal_write_strategy(),
143131
sync_method => sync | datasync,
144132
recovery_chunk_size => non_neg_integer(),
145133
hibernate_after => non_neg_integer(),
@@ -149,8 +137,7 @@
149137
min_bin_vheap_size => non_neg_integer()
150138
}.
151139

152-
-export_type([wal_conf/0,
153-
wal_write_strategy/0]).
140+
-export_type([wal_conf/0]).
154141

155142
-type wal_command() ::
156143
{append | truncate, writer_id(), ra_index(), ra_term(), term()}.
@@ -261,7 +248,6 @@ init(#{dir := Dir} = Conf0) ->
261248
segment_writer := SegWriter,
262249
compute_checksums := ComputeChecksums,
263250
pre_allocate := PreAllocate,
264-
write_strategy := WriteStrategy,
265251
sync_method := SyncMethod,
266252
garbage_collect := Gc,
267253
min_heap_size := MinHeapSize,
@@ -285,7 +271,6 @@ init(#{dir := Dir} = Conf0) ->
285271
max_size_bytes = max(?WAL_MIN_SIZE, MaxWalSize),
286272
max_entries = MaxEntries,
287273
recovery_chunk_size = RecoveryChunkSize,
288-
write_strategy = WriteStrategy,
289274
sync_method = SyncMethod,
290275
counter = CRef,
291276
mem_tables_tid = ets:whereis(MemTablesName),
@@ -299,7 +284,9 @@ init(#{dir := Dir} = Conf0) ->
299284
% generated during recovery
300285
ok = ra_log_segment_writer:await(SegWriter),
301286
{ok, Result}
302-
catch _:Err:_Stack ->
287+
catch _:Err:Stack ->
288+
?ERROR("WAL: failed to initialise with ~p, stack ~p",
289+
[Err, Stack]),
303290
{stop, Err}
304291
end.
305292

@@ -322,16 +309,14 @@ terminate(Reason, State) ->
322309
_ = cleanup(State),
323310
ok.
324311

325-
format_status(#state{conf = #conf{write_strategy = Strat,
326-
sync_method = SyncMeth,
312+
format_status(#state{conf = #conf{sync_method = SyncMeth,
327313
compute_checksums = Cs,
328314
names = #{wal := WalName},
329315
max_size_bytes = MaxSize},
330316
writers = Writers,
331317
wal = #wal{file_size = FSize,
332318
filename = Fn}}) ->
333-
#{write_strategy => Strat,
334-
sync_method => SyncMeth,
319+
#{sync_method => SyncMeth,
335320
compute_checksums => Cs,
336321
writers => maps:size(Writers),
337322
filename => filename:basename(Fn),
@@ -595,20 +580,6 @@ roll_over(#state{wal = Wal0, file_num = Num0,
595580
wal = Wal,
596581
file_num = Num}.
597582

598-
open_wal(File, Max, #conf{write_strategy = o_sync} = Conf) ->
599-
Modes = [sync | ?FILE_MODES],
600-
case prepare_file(File, Modes) of
601-
{ok, Fd} ->
602-
% many platforms implement O_SYNC a bit like O_DSYNC
603-
% perform a manual sync here to ensure metadata is flushed
604-
{Conf, #wal{fd = Fd,
605-
max_size = Max,
606-
filename = File}};
607-
{error, enotsup} ->
608-
?WARN("wal: o_sync write strategy not supported. "
609-
"Reverting back to default strategy.", []),
610-
open_wal(File, Max, Conf#conf{write_strategy = default})
611-
end;
612583
open_wal(File, Max, #conf{} = Conf0) ->
613584
{ok, Fd} = prepare_file(File, ?FILE_MODES),
614585
Conf = maybe_pre_allocate(Conf0, Fd, Max),
@@ -637,9 +608,7 @@ make_tmp(File) ->
637608
ok = file:close(Fd),
638609
Tmp.
639610

640-
maybe_pre_allocate(#conf{pre_allocate = true,
641-
write_strategy = Strat} = Conf, Fd, Max0)
642-
when Strat /= o_sync ->
611+
maybe_pre_allocate(#conf{pre_allocate = true} = Conf, Fd, Max0) ->
643612
Max = Max0 - ?HEADER_SIZE,
644613
case file:allocate(Fd, ?HEADER_SIZE, Max) of
645614
ok ->
@@ -654,7 +623,7 @@ maybe_pre_allocate(#conf{pre_allocate = true,
654623
" falling back to fsync instead of fdatasync", []),
655624
Conf#conf{pre_allocate = false}
656625
end;
657-
maybe_pre_allocate(Conf, _Fd, _Max) ->
626+
maybe_pre_allocate(Conf, _Fd, _Max0) ->
658627
Conf.
659628

660629
close_file(undefined) ->
@@ -666,26 +635,11 @@ start_batch(#state{conf = #conf{counter = CRef}} = State) ->
666635
ok = counters:add(CRef, ?C_BATCHES, 1),
667636
State#state{batch = #batch{}}.
668637

669-
670-
post_notify_flush(#state{wal = #wal{fd = Fd},
671-
conf = #conf{write_strategy = sync_after_notify,
672-
sync_method = SyncMeth}}) ->
673-
sync(Fd, SyncMeth);
674-
post_notify_flush(_State) ->
675-
ok.
676-
677638
flush_pending(#state{wal = #wal{fd = Fd},
678639
batch = #batch{pending = Pend},
679-
conf = #conf{write_strategy = WriteStrategy,
680-
sync_method = SyncMeth}} = State0) ->
681-
682-
case WriteStrategy of
683-
default ->
684-
ok = file:write(Fd, Pend),
685-
sync(Fd, SyncMeth);
686-
_ ->
687-
ok = file:write(Fd, Pend)
688-
end,
640+
conf = #conf{sync_method = SyncMeth}} = State0) ->
641+
ok = file:write(Fd, Pend),
642+
sync(Fd, SyncMeth),
689643
State0#state{batch = undefined}.
690644

691645
sync(_Fd, none) ->
@@ -709,7 +663,6 @@ complete_batch(#state{batch = #batch{waiting = Waiting,
709663
Ranges = maps:fold(fun (Pid, BatchWriter, Acc) ->
710664
complete_batch_writer(Pid, BatchWriter, Acc)
711665
end, Wal#wal.ranges, Waiting),
712-
ok = post_notify_flush(State),
713666
State#state{wal = Wal#wal{ranges = Ranges}}.
714667

715668
complete_batch_writer(Pid, #batch_writer{snap_idx = SnapIdx,
@@ -955,7 +908,6 @@ merge_conf_defaults(Conf) ->
955908
recovery_chunk_size => ?WAL_RECOVERY_CHUNK_SIZE,
956909
compute_checksums => true,
957910
pre_allocate => false,
958-
write_strategy => default,
959911
garbage_collect => false,
960912
sync_method => datasync,
961913
min_bin_vheap_size => ?MIN_BIN_VHEAP_SIZE,

src/ra_system.erl

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@
3636
wal_compute_checksums => boolean(),
3737
wal_max_batch_size => non_neg_integer(),
3838
wal_max_entries => undefined | non_neg_integer(),
39-
wal_write_strategy => default | o_sync | sync_after_notify,
4039
wal_sync_method => datasync | sync | none,
4140
wal_hibernate_after => non_neg_integer(),
4241
wal_garbage_collect => boolean(),
@@ -91,7 +90,6 @@ default_config() ->
9190
WalMaxBatchSize = application:get_env(ra, wal_max_batch_size,
9291
?WAL_DEFAULT_MAX_BATCH_SIZE),
9392
WalMaxEntries = application:get_env(ra, wal_max_entries, undefined),
94-
WalWriteStrategy = application:get_env(ra, wal_write_strategy, default),
9593
WalSyncMethod = application:get_env(ra, wal_sync_method, datasync),
9694
DataDir = ra_env:data_dir(),
9795
WalDataDir = application:get_env(ra, wal_data_dir, DataDir),
@@ -126,7 +124,6 @@ default_config() ->
126124
wal_compute_checksums => WalComputeChecksums,
127125
wal_max_batch_size => WalMaxBatchSize,
128126
wal_max_entries => WalMaxEntries,
129-
wal_write_strategy => WalWriteStrategy,
130127
wal_garbage_collect => WalGarbageCollect,
131128
wal_pre_allocate => WalPreAllocate,
132129
wal_sync_method => WalSyncMethod,

test/ra_log_wal_SUITE.erl

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@ all() ->
1717
[
1818
{group, default},
1919
{group, fsync},
20-
{group, o_sync},
21-
{group, sync_after_notify},
2220
{group, no_sync}
2321
].
2422

@@ -64,8 +62,6 @@ groups() ->
6462
{default, [], all_tests()},
6563
%% uses fsync instead of the default fdatasync
6664
{fsync, [], all_tests()},
67-
{o_sync, [], all_tests()},
68-
{sync_after_notify, [], all_tests()},
6965
{no_sync, [], all_tests()}
7066
].
7167

@@ -83,16 +79,16 @@ init_per_group(Group, Config) ->
8379
ra_directory:init(?SYS),
8480
ra_counters:init(),
8581
% application:ensure_all_started(lg),
86-
{SyncMethod, WriteStrat} =
82+
SyncMethod =
8783
case Group of
8884
fsync ->
89-
{sync, default};
85+
sync;
9086
no_sync ->
91-
{none, default};
87+
none;
9288
_ ->
93-
{datasync, Group}
89+
datasync
9490
end,
95-
[{write_strategy, WriteStrat},
91+
[
9692
{sys_cfg, SysCfg},
9793
{sync_method, SyncMethod} | Config].
9894

@@ -101,10 +97,9 @@ end_per_group(_, Config) ->
10197

10298
init_per_testcase(TestCase, Config) ->
10399
PrivDir = ?config(priv_dir, Config),
104-
G = ?config(write_strategy, Config),
105100
M = ?config(sync_method, Config),
106101
Sys = ?config(sys_cfg, Config),
107-
Dir = filename:join([PrivDir, G, M, TestCase]),
102+
Dir = filename:join([PrivDir, M, TestCase]),
108103
{ok, Ets} = ra_log_ets:start_link(Sys),
109104
ra_counters:init(),
110105
UId = atom_to_binary(TestCase, utf8),
@@ -114,7 +109,6 @@ init_per_testcase(TestCase, Config) ->
114109
WalConf = #{dir => Dir,
115110
name => ra_log_wal,
116111
names => Names,
117-
write_strategy => G,
118112
max_size_bytes => ?MAX_SIZE_BYTES},
119113
_ = ets:new(ra_log_snapshot_state,
120114
[named_table, public, {write_concurrency, true}]),
@@ -758,8 +752,16 @@ sys_get_status(Config) ->
758752
Conf = ?config(wal_conf, Config),
759753
{_UId, _} = ?config(writer_id, Config),
760754
{ok, Pid} = ra_log_wal:start_link(Conf),
761-
{_, _, _, [_, _, _, _, [_, _ ,S]]} = sys:get_status(ra_log_wal),
762-
#{write_strategy := _} = S,
755+
{_, _, _, [_, _, _, _, [_, _ , S]]} = sys:get_status(ra_log_wal),
756+
?assert(is_map(S)),
757+
758+
?assertMatch(#{sync_method := _,
759+
compute_checksums := _,
760+
writers := _,
761+
filename := _,
762+
current_size := _,
763+
max_size_bytes := _,
764+
counters := _ }, S),
763765
proc_lib:stop(Pid),
764766
ok.
765767

0 commit comments

Comments
 (0)