Skip to content

Commit 9aef97e

Browse files
hsun324meta-codesync[bot]
authored andcommitted
Remove bulk logs catchup
Summary: Bulk logs catchup generally causes more problems with its agressive strategy that it solves. Rather regular logs replication should be tuned to allow for a healthy rate of catchup during normal operation. Reviewed By: SarahDesouky Differential Revision: D90337625 Privacy Context Container: L1384697 fbshipit-source-id: 0d2cf385e9fec1f6251cfbce6c387f6fe8850a85
1 parent beb5a93 commit 9aef97e

File tree

5 files changed

+58
-409
lines changed

5 files changed

+58
-409
lines changed

include/wa_raft.hrl

Lines changed: 15 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,6 @@
2828
-define(RAFT_ACCEPTOR_NAME(Table, Partition), (wa_raft_acceptor:registered_name(Table, Partition))).
2929
%% Registered name of the RAFT log server for a RAFT partition
3030
-define(RAFT_LOG_NAME(Table, Partition), (wa_raft_log:registered_name(Table, Partition))).
31-
%% Registered name of the RAFT log catchup server for a RAFT partition
32-
-define(RAFT_LOG_CATCHUP_NAME(Table, Partition), (wa_raft_log_catchup:registered_name(Table, Partition))).
3331
%% Registered name of the RAFT server for a RAFT partition
3432
-define(RAFT_SERVER_NAME(Table, Partition), (wa_raft_server:registered_name(Table, Partition))).
3533
%% Registered name of the RAFT storage server for a RAFT partition
@@ -102,13 +100,11 @@
102100
%% Default call timeout for storage related operation (we need bigger default since storage can be slower)
103101
-define(RAFT_STORAGE_CALL_TIMEOUT(), ?RAFT_CONFIG(raft_storage_call_timeout, 60000)).
104102

105-
%% Maximum number of concurrent catchups by bulk log transfer
106-
-define(RAFT_MAX_CONCURRENT_LOG_CATCHUP(), ?RAFT_CONFIG(raft_max_log_catchup, 5)).
107-
%% Maximum number of concurrent catchups by snapshot transfer
103+
%% Maximum number of concurrent outgoing snapshot transfers initiated by leaders.
108104
-define(RAFT_MAX_CONCURRENT_SNAPSHOT_CATCHUP(), ?RAFT_CONFIG(raft_max_snapshot_catchup, 5)).
109-
%% Maximum number of incoming snapshots by snapshot transfer.
105+
%% Maximum number of concurrent incoming snapshot transfers.
110106
-define(RAFT_MAX_CONCURRENT_INCOMING_SNAPSHOT_TRANSFERS(), ?RAFT_CONFIG(raft_max_incoming_snapshot_transfers, 5)).
111-
%% Maximum number of incoming witness snapshots by snapshot transfer.
107+
%% Maximum number of concurrent incoming snapshot transfers of witness snapshots.
112108
-define(RAFT_MAX_CONCURRENT_INCOMING_WITNESS_SNAPSHOT_TRANSFERS(), ?RAFT_CONFIG(raft_max_incoming_witness_snapshot_transfers, 10)).
113109

114110
%% Default cross-node call timeout for heartbeats made for bulk logs catchup
@@ -260,27 +256,21 @@
260256
(?RAFT_APP_CONFIG(App, ?RAFT_LOG_HEARTBEAT_BINARY_ENTRIES, false) =:= true)
261257
).
262258

263-
%% Minimum number of log entries after which RAFT servers should use bulk logs catchup to bring peers
264-
%% back into sync if enabled.
265-
-define(RAFT_CATCHUP_BULK_LOG_THRESHOLD, raft_catchup_threshold).
266-
-define(RAFT_CATCHUP_BULK_LOG_THRESHOLD(App), ?RAFT_APP_CONFIG(App, {?RAFT_CATCHUP_BULK_LOG_THRESHOLD, catchup_max_follower_lag}, 50000)).
267-
%% Minimum number of unapplied log entries after which RAFT servers should use snapshot catchup to bring peers
268-
%% back into sync if enabled.
269-
-define(RAFT_CATCHUP_APPLY_BACKLOG_THRESHOLD, raft_catchup_apply_backlog_threshold).
270-
-define(RAFT_CATCHUP_APPLY_BACKLOG_THRESHOLD(App), ?RAFT_APP_CONFIG(App, {?RAFT_CATCHUP_APPLY_BACKLOG_THRESHOLD, catchup_max_follower_apply_backlog}, 100000)).
271-
%% Maximum log entries per heartbeat for catchup by bulk log transfer
272-
-define(RAFT_CATCHUP_MAX_ENTRIES_PER_BATCH, raft_catchup_log_batch_entries).
273-
-define(RAFT_CATCHUP_MAX_ENTRIES_PER_BATCH(App), ?RAFT_APP_CONFIG(App, ?RAFT_CATCHUP_MAX_ENTRIES_PER_BATCH, 800)).
274-
%% Maximum bytes per heartbeat for catchup by bulk log transfer
275-
-define(RAFT_CATCHUP_MAX_BYTES_PER_BATCH, raft_catchup_log_batch_bytes).
276-
-define(RAFT_CATCHUP_MAX_BYTES_PER_BATCH(App), ?RAFT_APP_CONFIG(App, ?RAFT_CATCHUP_MAX_BYTES_PER_BATCH, 4 * 1024 * 1024)).
277-
% Time to wait before retrying snapshot transport to a overloaded peer.
278-
-define(RAFT_SNAPSHOT_CATCHUP_OVERLOADED_BACKOFF_MS, snapshot_catchup_overloaded_backoff_ms).
259+
%% The number of log entries that have yet to be applied on a follower after
260+
%% which leaders should send a storage snapshot in lieu of continuing regular
261+
%% replication using log entries in heartbeats.
262+
-define(RAFT_SNAPSHOT_CATCHUP_THRESHOLD, raft_snapshot_catchup_threshold).
263+
-define(RAFT_SNAPSHOT_CATCHUP_THRESHOLD(App), ?RAFT_APP_CONFIG(App, ?RAFT_SNAPSHOT_CATCHUP_THRESHOLD, 100000)).
264+
%% Number of milliseconds to wait before attempting to send a new storage snapshot
265+
%% to a follower that previously rejected a snapshot due to being overloaded.
266+
-define(RAFT_SNAPSHOT_CATCHUP_OVERLOADED_BACKOFF_MS, raft_snapshot_catchup_overloaded_backoff_ms).
279267
-define(RAFT_SNAPSHOT_CATCHUP_OVERLOADED_BACKOFF_MS(App), ?RAFT_APP_CONFIG(App, ?RAFT_SNAPSHOT_CATCHUP_OVERLOADED_BACKOFF_MS, 1000)).
280-
% Time to wait before allowing a rerun of a completed snapshot transport.
268+
%% Number of milliseconds to wait before attempting to send a new storage snapshot
269+
%% to a follower that previously successfully received a storage snapshot.
281270
-define(RAFT_SNAPSHOT_CATCHUP_COMPLETED_BACKOFF_MS, raft_snapshot_catchup_completed_backoff_ms).
282271
-define(RAFT_SNAPSHOT_CATCHUP_COMPLETED_BACKOFF_MS(App), ?RAFT_APP_CONFIG(App, ?RAFT_SNAPSHOT_CATCHUP_COMPLETED_BACKOFF_MS, 20 * 1000)).
283-
% Time to wait before allowing a rerun of a failed snapshot transport.
272+
%% Number of milliseconds to wait before attempting to send a new storage snapshot
273+
%% to a follower that previously failed to receive a storage snapshot.
284274
-define(RAFT_SNAPSHOT_CATCHUP_FAILED_BACKOFF_MS, raft_snapshot_catchup_failed_backoff_ms).
285275
-define(RAFT_SNAPSHOT_CATCHUP_FAILED_BACKOFF_MS(App), ?RAFT_APP_CONFIG(App, ?RAFT_SNAPSHOT_CATCHUP_FAILED_BACKOFF_MS, 10 * 1000)).
286276

@@ -378,9 +368,6 @@
378368
log_name :: atom(),
379369
log_module :: module(),
380370

381-
% Log catchup options
382-
log_catchup_name :: atom(),
383-
384371
% Queue options
385372
queue_name :: atom(),
386373
queue_counters :: atomics:atomics_ref(),
@@ -435,8 +422,6 @@
435422

436423
%% Name of this RAFT replica's storage server
437424
storage :: atom(),
438-
%% Name of this RAFT replica's catchup server
439-
catchup :: atom(),
440425

441426
%% The index of the latest log entry in the local log that is known to
442427
%% match the log entries committed by the cluster

src/wa_raft_app_sup.erl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ init(_) ->
3838
% Setup tables used by shared services.
3939
wa_raft_info:init_tables(),
4040
wa_raft_transport:setup_tables(),
41-
wa_raft_log_catchup:init_tables(),
4241

4342
% Configure startup of shared services.
4443
ChildSpecs = [

src/wa_raft_log_catchup.erl

Lines changed: 0 additions & 306 deletions
This file was deleted.

0 commit comments

Comments
 (0)