Skip to content

Commit 3253325

Browse files
committed
QQ: Use new log_ext effect
This offloads the work of reading messages from on-disk segments to the interacting process rather than doing this blocking, performance affecting work in the ra server process. QQ: ensure opened segments are closed after some time of inactivity Processes that havea received messages that had to be read from disks may keep a segment open indefinitely. This introduces a timer which after some time of inactivity will close all opened segments to ensure file desciptors are not kept open indefinitely.
1 parent 7f584ff commit 3253325

File tree

11 files changed

+286
-109
lines changed

11 files changed

+286
-109
lines changed

MODULE.bazel

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -253,8 +253,8 @@ erlang_package.hex_package(
253253
name = "ra",
254254
build_file = "@rabbitmq-server//bazel:BUILD.ra",
255255
pkg = "ra",
256-
sha256 = "1bbb663e0375543f61700793d90ad1ba800be653f0446b135792b4f187e925fe",
257-
version = "2.16.0-pre.3",
256+
sha256 = "a7eae50b0c1c0be4daf9b7ee97be796e6fda372ea6b2047c3aeac89cdc2011df",
257+
version = "2.16.0-pre.10",
258258
)
259259

260260
erlang_package.git_package(

deps/rabbit/src/rabbit_channel.erl

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,7 @@
184184
messages_uncommitted,
185185
acks_uncommitted,
186186
pending_raft_commands,
187+
cached_segments,
187188
prefetch_count,
188189
state,
189190
garbage_collection]).
@@ -570,8 +571,6 @@ prioritise_cast(Msg, _Len, _State) ->
570571
case Msg of
571572
{confirm, _MsgSeqNos, _QPid} -> 5;
572573
{reject_publish, _MsgSeqNos, _QPid} -> 5;
573-
{queue_event, _, {confirm, _MsgSeqNos, _QPid}} -> 5;
574-
{queue_event, _, {reject_publish, _MsgSeqNos, _QPid}} -> 5;
575574
_ -> 0
576575
end.
577576

@@ -661,10 +660,14 @@ handle_cast(terminate, State = #ch{cfg = #conf{writer_pid = WriterPid}}) ->
661660
ok = rabbit_writer:flush(WriterPid),
662661
{stop, normal, State};
663662

664-
handle_cast({command, #'basic.consume_ok'{consumer_tag = CTag} = Msg}, State) ->
663+
handle_cast({command, #'basic.consume_ok'{consumer_tag = CTag} = Msg},
664+
#ch{consumer_mapping = CMap} = State)
665+
when is_map_key(CTag, CMap) ->
665666
ok = send(Msg, State),
666667
noreply(consumer_monitor(CTag, State));
667-
668+
handle_cast({command, #'basic.consume_ok'{}}, State) ->
669+
%% a consumer was not found so just ignore this
670+
noreply(State);
668671
handle_cast({command, Msg}, State) ->
669672
ok = send(Msg, State),
670673
noreply(State);
@@ -2281,6 +2284,8 @@ i(acks_uncommitted, #ch{tx = {_Msgs, Acks}}) -> ack_len(Acks);
22812284
i(acks_uncommitted, #ch{}) -> 0;
22822285
i(pending_raft_commands, #ch{queue_states = QS}) ->
22832286
pending_raft_commands(QS);
2287+
i(cached_segments, #ch{queue_states = QS}) ->
2288+
cached_segments(QS);
22842289
i(state, #ch{cfg = #conf{state = running}}) -> credit_flow:state();
22852290
i(state, #ch{cfg = #conf{state = State}}) -> State;
22862291
i(prefetch_count, #ch{cfg = #conf{consumer_prefetch = C}}) -> C;
@@ -2309,6 +2314,17 @@ pending_raft_commands(QStates) ->
23092314
end,
23102315
rabbit_queue_type:fold_state(Fun, 0, QStates).
23112316

2317+
cached_segments(QStates) ->
2318+
Fun = fun(_, V, Acc) ->
2319+
case rabbit_queue_type:state_info(V) of
2320+
#{cached_segments := P} ->
2321+
Acc + P;
2322+
_ ->
2323+
Acc
2324+
end
2325+
end,
2326+
rabbit_queue_type:fold_state(Fun, 0, QStates).
2327+
23122328
name(#ch{cfg = #conf{conn_name = ConnName, channel = Channel}}) ->
23132329
list_to_binary(rabbit_misc:format("~ts (~tp)", [ConnName, Channel])).
23142330

deps/rabbit/src/rabbit_fifo.erl

Lines changed: 42 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,10 @@
8989
make_purge/0,
9090
make_purge_nodes/1,
9191
make_update_config/1,
92-
make_garbage_collection/0
92+
make_garbage_collection/0,
93+
94+
exec_read/3
95+
9396
]).
9497

9598
-ifdef(TEST).
@@ -2076,30 +2079,27 @@ delivery_effect(ConsumerKey, [{MsgId, ?MSG(Idx, Header)}],
20762079
{CTag, CPid} = consumer_id(ConsumerKey, State),
20772080
{send_msg, CPid, {delivery, CTag, [{MsgId, {Header, RawMsg}}]},
20782081
?DELIVERY_SEND_MSG_OPTS};
2079-
delivery_effect(ConsumerKey, Msgs,
2080-
#?STATE{cfg = #cfg{resource = QR}} = State) ->
2082+
delivery_effect(ConsumerKey, Msgs, #?STATE{} = State) ->
20812083
{CTag, CPid} = consumer_id(ConsumerKey, State),
2082-
{RaftIdxs, Num} = lists:foldr(fun ({_, ?MSG(I, _)}, {Acc, N}) ->
2083-
{[I | Acc], N+1}
2084-
end, {[], 0}, Msgs),
2085-
{log, RaftIdxs,
2086-
fun (Commands)
2087-
when length(Commands) < Num ->
2088-
%% the mandatory length/1 guard is a bit :(
2089-
rabbit_log:info("~ts: requested read consumer tag '~ts' of ~b "
2090-
"indexes ~w but only ~b were returned. "
2091-
"This is most likely a stale read request "
2092-
"and can be ignored",
2093-
[rabbit_misc:rs(QR), CTag, Num, RaftIdxs,
2094-
length(Commands)]),
2095-
[];
2096-
(Commands) ->
2097-
DelMsgs = lists:zipwith(
2098-
fun (Cmd, {MsgId, ?MSG(_Idx, Header)}) ->
2099-
{MsgId, {Header, get_msg(Cmd)}}
2100-
end, Commands, Msgs),
2101-
[{send_msg, CPid, {delivery, CTag, DelMsgs},
2102-
?DELIVERY_SEND_MSG_OPTS}]
2084+
{RaftIdxs, _Num} = lists:foldr(fun ({_, ?MSG(I, _)}, {Acc, N}) ->
2085+
{[I | Acc], N+1}
2086+
end, {[], 0}, Msgs),
2087+
{log_ext, RaftIdxs,
2088+
fun (ReadPlan) ->
2089+
case node(CPid) == node() of
2090+
true ->
2091+
[{send_msg, CPid, {delivery, CTag, ReadPlan, Msgs},
2092+
?DELIVERY_SEND_MSG_OPTS}];
2093+
false ->
2094+
%% if we got there we need to read the data on this node
2095+
%% and send it to the consumer pid as it isn't availble
2096+
%% locally
2097+
{DelMsgs, Flru} = exec_read(undefined, ReadPlan, Msgs),
2098+
%% we need to evict all cached items here
2099+
_ = ra_flru:evict_all(Flru),
2100+
[{send_msg, CPid, {delivery, CTag, DelMsgs},
2101+
?DELIVERY_SEND_MSG_OPTS}]
2102+
end
21032103
end,
21042104
{local, node(CPid)}}.
21052105

@@ -3014,3 +3014,21 @@ incr_msg(Msg0, DelFailed, Anns) ->
30143014
false ->
30153015
Msg2
30163016
end.
3017+
3018+
exec_read(Flru0, ReadPlan, Msgs) ->
3019+
try ra_log_read_plan:execute(ReadPlan, Flru0) of
3020+
{Entries, Flru} ->
3021+
%% return a list in original order
3022+
{lists:map(fun ({MsgId, ?MSG(Idx, Header)}) ->
3023+
Cmd = maps:get(Idx, Entries),
3024+
{MsgId, {Header, get_msg(Cmd)}}
3025+
end, Msgs), Flru}
3026+
catch exit:{missing_key, _}
3027+
when Flru0 =/= undefined ->
3028+
%% this segment has most likely been appended to but the
3029+
%% cached index doesn't know about new items and need to be
3030+
%% re-generated
3031+
_ = ra_flru:evict_all(Flru0),
3032+
%% retry without segment cache
3033+
exec_read(undefined, ReadPlan, Msgs)
3034+
end.

deps/rabbit/src/rabbit_fifo_client.erl

Lines changed: 98 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
purge/1,
3131
update_machine_state/2,
3232
pending_size/1,
33+
num_cached_segments/1,
3334
stat/1,
3435
stat/2,
3536
query_single_active_consumer/1,
@@ -40,8 +41,12 @@
4041
-define(TIMER_TIME, 10000).
4142
-define(COMMAND_TIMEOUT, 30000).
4243
-define(UNLIMITED_PREFETCH_COUNT, 2000). %% something large for ra
44+
%% controls the timer for closing cached segments
45+
-define(CACHE_SEG_TIMEOUT, 5000).
4346

4447
-type seq() :: non_neg_integer().
48+
-type milliseconds() :: non_neg_integer().
49+
4550

4651
-record(consumer, {key :: rabbit_fifo:consumer_key(),
4752
% status = up :: up | cancelled,
@@ -69,7 +74,11 @@
6974
pending = #{} :: #{seq() =>
7075
{term(), rabbit_fifo:command()}},
7176
consumers = #{} :: #{rabbit_types:ctag() => #consumer{}},
72-
timer_state :: term()
77+
timer_state :: term(),
78+
cached_segments :: undefined |
79+
{undefined | reference(),
80+
LastSeenMs :: milliseconds(),
81+
ra_flru:state()}
7382
}).
7483

7584
-opaque state() :: #state{}.
@@ -132,9 +141,15 @@ enqueue(QName, Correlation, Msg,
132141
%% it is safe to reject the message as we never attempted
133142
%% to send it
134143
{reject_publish, State0};
144+
{error, {shutdown, delete}} ->
145+
rabbit_log:debug("~ts: QQ ~ts tried to register enqueuer during delete shutdown",
146+
[?MODULE, rabbit_misc:rs(QName)]),
147+
{reject_publish, State0};
135148
{timeout, _} ->
136149
{reject_publish, State0};
137150
Err ->
151+
rabbit_log:debug("~ts: QQ ~ts error when registering enqueuer ~p",
152+
[?MODULE, rabbit_misc:rs(QName), Err]),
138153
exit(Err)
139154
end;
140155
enqueue(_QName, _Correlation, _Msg,
@@ -167,7 +182,7 @@ enqueue(QName, Correlation, Msg,
167182
%% @param QueueName Name of the queue.
168183
%% @param Msg an arbitrary erlang term representing the message.
169184
%% @param State the current {@module} state.
170-
%% @returns
185+
%% @return's
171186
%% `{ok, State, Actions}' if the command was successfully sent.
172187
%% {@module} assigns a sequence number to every raft command it issues. The
173188
%% SequenceNumber can be correlated to the applied sequence numbers returned
@@ -510,6 +525,15 @@ purge(Server) ->
510525
pending_size(#state{pending = Pend}) ->
511526
maps:size(Pend).
512527

528+
-spec num_cached_segments(state()) -> non_neg_integer().
529+
num_cached_segments(#state{cached_segments = CachedSegments}) ->
530+
case CachedSegments of
531+
undefined ->
532+
0;
533+
{_, _, Cached} ->
534+
ra_flru:size(Cached)
535+
end.
536+
513537
-spec stat(ra:server_id()) ->
514538
{ok, non_neg_integer(), non_neg_integer()}
515539
| {error | timeout, term()}.
@@ -633,7 +657,8 @@ handle_ra_event(QName, From, {applied, Seqs},
633657
_ ->
634658
{ok, State1, Actions}
635659
end;
636-
handle_ra_event(QName, From, {machine, {delivery, _ConsumerTag, _} = Del}, State0) ->
660+
handle_ra_event(QName, From, {machine, Del}, State0)
661+
when element(1, Del) == delivery ->
637662
handle_delivery(QName, From, Del, State0);
638663
handle_ra_event(_QName, _From, {machine, Action}, State)
639664
when element(1, Action) =:= credit_reply orelse
@@ -643,28 +668,31 @@ handle_ra_event(_QName, _, {machine, {queue_status, Status}},
643668
#state{} = State) ->
644669
%% just set the queue status
645670
{ok, State#state{queue_status = Status}, []};
646-
handle_ra_event(_QName, Leader, {machine, leader_change},
671+
handle_ra_event(QName, Leader, {machine, leader_change},
647672
#state{leader = OldLeader,
648673
pending = Pending} = State0) ->
649674
%% we need to update leader
650675
%% and resend any pending commands
651-
rabbit_log:debug("~ts: Detected QQ leader change from ~w to ~w, "
676+
rabbit_log:debug("~ts: ~s Detected QQ leader change from ~w to ~w, "
652677
"resending ~b pending commands",
653-
[?MODULE, OldLeader, Leader, maps:size(Pending)]),
678+
[rabbit_misc:rs(QName), ?MODULE, OldLeader,
679+
Leader, maps:size(Pending)]),
654680
State = resend_all_pending(State0#state{leader = Leader}),
655681
{ok, State, []};
656682
handle_ra_event(_QName, _From, {rejected, {not_leader, Leader, _Seq}},
657683
#state{leader = Leader} = State) ->
658684
{ok, State, []};
659-
handle_ra_event(_QName, _From, {rejected, {not_leader, Leader, _Seq}},
685+
handle_ra_event(QName, _From, {rejected, {not_leader, Leader, _Seq}},
660686
#state{leader = OldLeader,
661687
pending = Pending} = State0) ->
662-
rabbit_log:debug("~ts: Detected QQ leader change (rejection) from ~w to ~w, "
688+
rabbit_log:debug("~ts: ~s Detected QQ leader change (rejection) from ~w to ~w, "
663689
"resending ~b pending commands",
664-
[?MODULE, OldLeader, Leader, maps:size(Pending)]),
690+
[rabbit_misc:rs(QName), ?MODULE, OldLeader,
691+
Leader, maps:size(Pending)]),
665692
State = resend_all_pending(State0#state{leader = Leader}),
666693
{ok, cancel_timer(State), []};
667-
handle_ra_event(_QName, _From, {rejected, {not_leader, _UndefinedMaybe, _Seq}}, State0) ->
694+
handle_ra_event(_QName, _From,
695+
{rejected, {not_leader, _UndefinedMaybe, _Seq}}, State0) ->
668696
% TODO: how should these be handled? re-sent on timer or try random
669697
{ok, State0, []};
670698
handle_ra_event(QName, _, timeout, #state{cfg = #cfg{servers = Servers}} = State0) ->
@@ -676,6 +704,30 @@ handle_ra_event(QName, _, timeout, #state{cfg = #cfg{servers = Servers}} = State
676704
State = resend_all_pending(State0#state{leader = Leader}),
677705
{ok, State, []}
678706
end;
707+
handle_ra_event(QName, Leader, close_cached_segments,
708+
#state{cached_segments = CachedSegments} = State) ->
709+
{ok,
710+
case CachedSegments of
711+
undefined ->
712+
%% timer didn't get cancelled so just ignore this
713+
State;
714+
{_TRef, Last, Cache} ->
715+
case now_ms() > Last + ?CACHE_SEG_TIMEOUT of
716+
true ->
717+
rabbit_log:debug("~ts: closing_cached_segments",
718+
[rabbit_misc:rs(QName)]),
719+
%% its been long enough, evict all
720+
_ = ra_flru:evict_all(Cache),
721+
State#state{cached_segments = undefined};
722+
false ->
723+
%% set another timer
724+
Ref = erlang:send_after(?CACHE_SEG_TIMEOUT, self(),
725+
{'$gen_cast',
726+
{queue_event, QName,
727+
{Leader, close_cached_segments}}}),
728+
State#state{cached_segments = {Ref, Last, Cache}}
729+
end
730+
end, []};
679731
handle_ra_event(_QName, _Leader, {machine, eol}, State) ->
680732
{eol, [{unblock, cluster_name(State)}]}.
681733

@@ -839,7 +891,39 @@ handle_delivery(_QName, _Leader, {delivery, Tag, [_ | _] = IdMsgs},
839891
%% we should return all messages.
840892
MsgIntIds = [Id || {Id, _} <- IdMsgs],
841893
{State1, Deliveries} = return(Tag, MsgIntIds, State0),
842-
{ok, State1, Deliveries}.
894+
{ok, State1, Deliveries};
895+
handle_delivery(QName, Leader, {delivery, Tag, ReadPlan, Msgs},
896+
#state{cached_segments = CachedSegments} = State) ->
897+
{TRef, Cached0} = case CachedSegments of
898+
undefined ->
899+
{undefined, undefined};
900+
{R, _, C} ->
901+
{R, C}
902+
end,
903+
{MsgIds, Cached1} = rabbit_fifo:exec_read(Cached0, ReadPlan, Msgs),
904+
%% if there are cached segments after a read and there
905+
%% is no current timer set, set a timer
906+
%% send a message to evict cache after some time
907+
Cached = case ra_flru:size(Cached1) > 0 of
908+
true when TRef == undefined ->
909+
Ref = erlang:send_after(?CACHE_SEG_TIMEOUT, self(),
910+
{'$gen_cast',
911+
{queue_event, QName,
912+
{Leader, close_cached_segments}}}),
913+
{Ref, now_ms(), Cached1};
914+
true ->
915+
{TRef, now_ms(), Cached1};
916+
false ->
917+
if TRef =/= undefined ->
918+
_ = erlang:cancel_timer(TRef, [{async, true}]),
919+
ok;
920+
true ->
921+
ok
922+
end,
923+
undefined
924+
end,
925+
handle_delivery(QName, Leader, {delivery, Tag, MsgIds},
926+
State#state{cached_segments = Cached}).
843927

844928
transform_msgs(QName, QRef, Msgs) ->
845929
lists:map(
@@ -1008,3 +1092,6 @@ send_pending(Cid, #state{unsent_commands = Unsent} = State0) ->
10081092
normal, S0)
10091093
end, State0, Commands),
10101094
State1#state{unsent_commands = maps:remove(Cid, Unsent)}.
1095+
1096+
now_ms() ->
1097+
erlang:system_time(millisecond).

deps/rabbit/src/rabbit_fifo_dlx.erl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,8 @@ discard(Msgs, Reason, undefined, State) ->
154154
[Reason, rabbit_quorum_queue, disabled, length(Msgs)]}]};
155155
discard(Msgs0, Reason, {at_most_once, {Mod, Fun, Args}}, State) ->
156156
Idxs = [I || ?MSG(I, _) <- Msgs0],
157+
%% TODO: this could be turned into a log_ext effect instead to avoid
158+
%% reading from disk inside the qq process
157159
Effect = {log, Idxs,
158160
fun (Log) ->
159161
Lookup = maps:from_list(lists:zip(Idxs, Log)),

deps/rabbit/src/rabbit_quorum_queue.erl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1094,7 +1094,8 @@ deliver(QSs, Msg0, Options) ->
10941094

10951095

10961096
state_info(S) ->
1097-
#{pending_raft_commands => rabbit_fifo_client:pending_size(S)}.
1097+
#{pending_raft_commands => rabbit_fifo_client:pending_size(S),
1098+
cached_segments => rabbit_fifo_client:num_cached_segments(S)}.
10981099

10991100
-spec infos(rabbit_types:r('queue')) -> rabbit_types:infos().
11001101
infos(QName) ->

0 commit comments

Comments
 (0)