Skip to content

Commit 01cc60f

Browse files
committed
QQ: ensure opened segments are closed after some time of inactivity
Processes that havea received messages that had to be read from disks may keep a segment open indefinitely. This introduces a timer which after some time of inactivity will close all opened segments to ensure file desciptors are not kept open indefinitely.
1 parent 2dc2cbd commit 01cc60f

File tree

4 files changed

+101
-11
lines changed

4 files changed

+101
-11
lines changed

deps/rabbit/src/rabbit_channel.erl

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,7 @@
184184
messages_uncommitted,
185185
acks_uncommitted,
186186
pending_raft_commands,
187+
cached_segments,
187188
prefetch_count,
188189
state,
189190
garbage_collection]).
@@ -2287,6 +2288,8 @@ i(acks_uncommitted, #ch{tx = {_Msgs, Acks}}) -> ack_len(Acks);
22872288
i(acks_uncommitted, #ch{}) -> 0;
22882289
i(pending_raft_commands, #ch{queue_states = QS}) ->
22892290
pending_raft_commands(QS);
2291+
i(cached_segments, #ch{queue_states = QS}) ->
2292+
cached_segments(QS);
22902293
i(state, #ch{cfg = #conf{state = running}}) -> credit_flow:state();
22912294
i(state, #ch{cfg = #conf{state = State}}) -> State;
22922295
i(prefetch_count, #ch{cfg = #conf{consumer_prefetch = C}}) -> C;
@@ -2315,6 +2318,17 @@ pending_raft_commands(QStates) ->
23152318
end,
23162319
rabbit_queue_type:fold_state(Fun, 0, QStates).
23172320

2321+
cached_segments(QStates) ->
2322+
Fun = fun(_, V, Acc) ->
2323+
case rabbit_queue_type:state_info(V) of
2324+
#{cached_segments := P} ->
2325+
Acc + P;
2326+
_ ->
2327+
Acc
2328+
end
2329+
end,
2330+
rabbit_queue_type:fold_state(Fun, 0, QStates).
2331+
23182332
name(#ch{cfg = #conf{conn_name = ConnName, channel = Channel}}) ->
23192333
list_to_binary(rabbit_misc:format("~ts (~tp)", [ConnName, Channel])).
23202334

deps/rabbit/src/rabbit_fifo_client.erl

Lines changed: 81 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
purge/1,
3131
update_machine_state/2,
3232
pending_size/1,
33+
num_cached_segments/1,
3334
stat/1,
3435
stat/2,
3536
query_single_active_consumer/1,
@@ -40,8 +41,12 @@
4041
-define(TIMER_TIME, 10000).
4142
-define(COMMAND_TIMEOUT, 30000).
4243
-define(UNLIMITED_PREFETCH_COUNT, 2000). %% something large for ra
44+
%% controls the timer for closing cached segments
45+
-define(CACHE_SEG_TIMEOUT, 5000).
4346

4447
-type seq() :: non_neg_integer().
48+
-type milliseconds() :: non_neg_integer().
49+
4550

4651
-record(consumer, {key :: rabbit_fifo:consumer_key(),
4752
% status = up :: up | cancelled,
@@ -70,7 +75,10 @@
7075
{term(), rabbit_fifo:command()}},
7176
consumers = #{} :: #{rabbit_types:ctag() => #consumer{}},
7277
timer_state :: term(),
73-
cached_segments :: undefined | ra_flru:state()
78+
cached_segments :: undefined |
79+
{undefined | reference(),
80+
LastSeenMs :: milliseconds(),
81+
ra_flr:state()}
7482
}).
7583

7684
-opaque state() :: #state{}.
@@ -517,6 +525,15 @@ purge(Server) ->
517525
pending_size(#state{pending = Pend}) ->
518526
maps:size(Pend).
519527

528+
-spec num_cached_segments(state()) -> non_neg_integer().
529+
num_cached_segments(#state{cached_segments = CachedSegments}) ->
530+
case CachedSegments of
531+
undefined ->
532+
0;
533+
{_, _, Cached} ->
534+
ra_flru:size(Cached)
535+
end.
536+
520537
-spec stat(ra:server_id()) ->
521538
{ok, non_neg_integer(), non_neg_integer()}
522539
| {error | timeout, term()}.
@@ -651,24 +668,25 @@ handle_ra_event(_QName, _, {machine, {queue_status, Status}},
651668
#state{} = State) ->
652669
%% just set the queue status
653670
{ok, State#state{queue_status = Status}, []};
654-
handle_ra_event(_QName, Leader, {machine, leader_change},
671+
handle_ra_event(QName, Leader, {machine, leader_change},
655672
#state{leader = OldLeader} = State0) ->
656673
%% we need to update leader
657674
%% and resend any pending commands
658-
rabbit_log:debug("~ts: Detected QQ leader change from ~w to ~w",
659-
[?MODULE, OldLeader, Leader]),
675+
rabbit_log:debug("~ts: ~s Detected QQ leader change from ~w to ~w",
676+
[rabbit_misc:rs(QName), ?MODULE, OldLeader, Leader]),
660677
State = resend_all_pending(State0#state{leader = Leader}),
661678
{ok, State, []};
662679
handle_ra_event(_QName, _From, {rejected, {not_leader, Leader, _Seq}},
663680
#state{leader = Leader} = State) ->
664681
{ok, State, []};
665-
handle_ra_event(_QName, _From, {rejected, {not_leader, Leader, _Seq}},
682+
handle_ra_event(QName, _From, {rejected, {not_leader, Leader, _Seq}},
666683
#state{leader = OldLeader} = State0) ->
667-
rabbit_log:debug("~ts: Detected QQ leader change (rejection) from ~w to ~w",
668-
[?MODULE, OldLeader, Leader]),
684+
rabbit_log:debug("~ts: ~s Detected QQ leader change (rejection) from ~w to ~w",
685+
[rabbit_misc:rs(QName), ?MODULE, OldLeader, Leader]),
669686
State = resend_all_pending(State0#state{leader = Leader}),
670687
{ok, cancel_timer(State), []};
671-
handle_ra_event(_QName, _From, {rejected, {not_leader, _UndefinedMaybe, _Seq}}, State0) ->
688+
handle_ra_event(_QName, _From,
689+
{rejected, {not_leader, _UndefinedMaybe, _Seq}}, State0) ->
672690
% TODO: how should these be handled? re-sent on timer or try random
673691
{ok, State0, []};
674692
handle_ra_event(QName, _, timeout, #state{cfg = #cfg{servers = Servers}} = State0) ->
@@ -680,6 +698,30 @@ handle_ra_event(QName, _, timeout, #state{cfg = #cfg{servers = Servers}} = State
680698
State = resend_all_pending(State0#state{leader = Leader}),
681699
{ok, State, []}
682700
end;
701+
handle_ra_event(QName, Leader, close_cached_segments,
702+
#state{cached_segments = CachedSegments} = State) ->
703+
{ok,
704+
case CachedSegments of
705+
undefined ->
706+
%% timer didn't get cancelled so just ignore this
707+
State;
708+
{_TRef, Last, Cache} ->
709+
case now_ms() > Last + ?CACHE_SEG_TIMEOUT of
710+
true ->
711+
rabbit_log:debug("~ts: closing_cached_segments",
712+
[rabbit_misc:rs(QName)]),
713+
%% its been long enough, evict all
714+
_ = ra_flru:evict_all(Cache),
715+
State#state{cached_segments = undefined};
716+
false ->
717+
%% set another timer
718+
Ref = erlang:send_after(?CACHE_SEG_TIMEOUT, self(),
719+
{'$gen_cast',
720+
{queue_event, QName,
721+
{Leader, close_cached_segments}}}),
722+
State#state{cached_segments = {Ref, Last, Cache}}
723+
end
724+
end, []};
683725
handle_ra_event(_QName, _Leader, {machine, eol}, State) ->
684726
{eol, [{unblock, cluster_name(State)}]}.
685727

@@ -845,8 +887,34 @@ handle_delivery(_QName, _Leader, {delivery, Tag, [_ | _] = IdMsgs},
845887
{State1, Deliveries} = return(Tag, MsgIntIds, State0),
846888
{ok, State1, Deliveries};
847889
handle_delivery(QName, Leader, {delivery, Tag, ReadPlan, Msgs},
848-
#state{cached_segments = Cached0} = State) ->
849-
{MsgIds, Cached} = rabbit_fifo:exec_read(Cached0, ReadPlan, Msgs),
890+
#state{cached_segments = CachedSegments} = State) ->
891+
{TRef, Cached0} = case CachedSegments of
892+
undefined ->
893+
{undefined, undefined};
894+
{R, _, C} ->
895+
{R, C}
896+
end,
897+
{MsgIds, Cached1} = rabbit_fifo:exec_read(Cached0, ReadPlan, Msgs),
898+
%% if there are cached segments after a read and there
899+
%% is no current timer set, set a timer
900+
%% send a message to evict cache after some time
901+
Cached = case ra_flru:size(Cached1) > 0 of
902+
true when TRef == undefined ->
903+
Ref = erlang:send_after(?CACHE_SEG_TIMEOUT, self(),
904+
{'$gen_cast',
905+
{queue_event, QName,
906+
{Leader, close_cached_segments}}}),
907+
{Ref, now_ms(), Cached1};
908+
true ->
909+
{TRef, now_ms(), Cached1};
910+
false ->
911+
if TRef =/= undefined ->
912+
erlang:cancel_timer(TRef, [{async, true}]);
913+
true ->
914+
ok
915+
end,
916+
undefined
917+
end,
850918
handle_delivery(QName, Leader, {delivery, Tag, MsgIds},
851919
State#state{cached_segments = Cached}).
852920

@@ -1017,3 +1085,6 @@ send_pending(Cid, #state{unsent_commands = Unsent} = State0) ->
10171085
normal, S0)
10181086
end, State0, Commands),
10191087
State1#state{unsent_commands = maps:remove(Cid, Unsent)}.
1088+
1089+
now_ms() ->
1090+
erlang:system_time(millisecond).

deps/rabbit/src/rabbit_quorum_queue.erl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1094,7 +1094,8 @@ deliver(QSs, Msg0, Options) ->
10941094

10951095

10961096
state_info(S) ->
1097-
#{pending_raft_commands => rabbit_fifo_client:pending_size(S)}.
1097+
#{pending_raft_commands => rabbit_fifo_client:pending_size(S),
1098+
cached_segments => rabbit_fifo_client:num_cached_segments(S)}.
10981099

10991100
-spec infos(rabbit_types:r('queue')) -> rabbit_types:infos().
11001101
infos(QName) ->

deps/rabbitmq_management/priv/www/js/tmpl/channel.ejs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@
6464
<th>Pending Raft commands</th>
6565
<td><%= channel.pending_raft_commands %></td>
6666
</tr>
67+
<tr>
68+
<th>Cached segments</th>
69+
<td><%= channel.cached_segments %></td>
70+
</tr>
6771
</table>
6872

6973
</div>

0 commit comments

Comments
 (0)