Skip to content

Commit ee8072c

Browse files
committed
QQ: tweaks to checkpointing for use cases with fewer larger messages.
Lower the min_checkpoint_interval substantially to allow quorum queues better control over when checkpoints are taken. Implement some tweaks to the checkpoint decision logic to be more likey to take a checkpoint for low throughput queues with large messages.
1 parent bb20885 commit ee8072c

File tree

3 files changed

+31
-12
lines changed

3 files changed

+31
-12
lines changed

deps/rabbit/src/rabbit_fifo.erl

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2934,21 +2934,39 @@ do_checkpoints(Ts,
29342934
Smallest
29352935
end,
29362936
MsgsTot = messages_total(MacState),
2937+
#{segments_range := SegRange,
2938+
num_segments := NumSegments} = ra_aux:log_stats(RaAux),
2939+
SegmentsMightBeDeletable = case SegRange of
2940+
undefined ->
2941+
false;
2942+
{S, _E} ->
2943+
NewSmallest > S
2944+
end,
2945+
CanReclaimSegments = NumSegments > 1 andalso
2946+
SegmentsMightBeDeletable,
2947+
29372948
{CheckMinInterval, CheckMinIndexes, CheckMaxIndexes} =
29382949
persistent_term:get(quorum_queue_checkpoint_config,
29392950
{?CHECK_MIN_INTERVAL_MS, ?CHECK_MIN_INDEXES,
29402951
?CHECK_MAX_INDEXES}),
29412952
EnoughTimeHasPassed = TimeSince > CheckMinInterval,
29422953

2943-
%% enough time has passed and enough indexes have been committed
2944-
case (IndexesSince > MinIndexes andalso
2945-
EnoughTimeHasPassed) orelse
2946-
%% the queue is empty and some commands have been
2947-
%% applied since the last checkpoint
2948-
(MsgsTot == 0 andalso
2949-
IndexesSince > CheckMinIndexes andalso
2950-
EnoughTimeHasPassed) orelse
2951-
Force of
2954+
case EnoughTimeHasPassed andalso
2955+
(
2956+
%% condition 1: enough indexes have been comitted since the last
2957+
%% checkpoint
2958+
(IndexesSince > MinIndexes) orelse
2959+
%% condition 2 the queue is empty and _some_ commands (more than 64)
2960+
%% have been applied since the last checkpoint
2961+
(MsgsTot == 0 andalso
2962+
IndexesSince > 64) orelse
2963+
%% condition 3: there are segments and the number of entries in
2964+
%% segments is considerably larger than the number of messages
2965+
%% in the queue
2966+
CanReclaimSegments orelse
2967+
%% doing it anyway
2968+
Force
2969+
) of
29522970
true ->
29532971
%% take fewer checkpoints the more messages there are on queue
29542972
NextIndexes = min(max(MsgsTot, CheckMinIndexes), CheckMaxIndexes),

deps/rabbit/src/rabbit_fifo.hrl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@
100100
% represents a partially applied module call
101101

102102
-define(CHECK_MIN_INTERVAL_MS, 1000).
103-
-define(CHECK_MIN_INDEXES, 4096).
103+
-define(CHECK_MIN_INDEXES, 4096 * 2).
104104
-define(CHECK_MAX_INDEXES, 666_667).
105105

106106
-define(USE_AVG_HALF_LIFE, 10000.0).

deps/rabbit/src/rabbit_quorum_queue.erl

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,9 @@
145145
-define(DELETE_TIMEOUT, 5000).
146146
-define(MEMBER_CHANGE_TIMEOUT, 20_000).
147147
-define(SNAPSHOT_INTERVAL, 8192). %% the ra default is 4096
148-
% -define(UNLIMITED_PREFETCH_COUNT, 2000). %% something large for ra
149-
-define(MIN_CHECKPOINT_INTERVAL, 8192). %% the ra default is 16384
148+
%% setting a low default here to allow quorum queues to better chose themselves
149+
%% when to take a checkpoint
150+
-define(MIN_CHECKPOINT_INTERVAL, 64).
150151
-define(LEADER_HEALTH_CHECK_TIMEOUT, 5_000).
151152
-define(GLOBAL_LEADER_HEALTH_CHECK_TIMEOUT, 60_000).
152153

0 commit comments

Comments
 (0)