merging heads

Tim Watson · Tim Watson · commit ae9a5212eed7 · 2012-08-15T08:08:52.000-04:00
diff --git a/src/rabbit_amqqueue_process.erl b/src/rabbit_amqqueue_process.erl
@@ -47,6 +47,7 @@
             msg_id_to_channel,
             ttl,
             ttl_timer_ref,
+            ttl_timer_expiry,
             senders,
             publish_seqno,
             unconfirmed,
@@ -559,7 +560,8 @@ deliver_or_enqueue(Delivery = #delivery{message    = Message,
                 maybe_record_confirm_message(Confirm, State1),
             Props = message_properties(Confirm, State2),
             BQS1 = BQ:publish(Message, Props, SenderPid, BQS),
-            ensure_ttl_timer(State2#q{backing_queue_state = BQS1})
+            ensure_ttl_timer(Props#message_properties.expiry,
+                             State2#q{backing_queue_state = BQS1})
     end.
 
 requeue_and_run(AckTags, State = #q{backing_queue = BQ}) ->
@@ -699,28 +701,42 @@ drop_expired_messages(State = #q{backing_queue_state = BQS,
                                  backing_queue       = BQ }) ->
     Now = now_micros(),
     DLXFun = dead_letter_fun(expired, State),
-    ExpirePred = fun (#message_properties{expiry = Expiry}) -> Now > Expiry end,
-    case DLXFun of
-        undefined -> {undefined, BQS1} = BQ:dropwhile(ExpirePred, false, BQS),
-                     BQS1;
-        _         -> {Msgs, BQS1} = BQ:dropwhile(ExpirePred, true, BQS),
-                     lists:foreach(
-                       fun({Msg, AckTag}) -> DLXFun(Msg, AckTag) end, Msgs),
-                     BQS1
-    end,
-    ensure_ttl_timer(State#q{backing_queue_state = BQS1}).
-
-ensure_ttl_timer(State = #q{backing_queue       = BQ,
-                            backing_queue_state = BQS,
-                            ttl                 = TTL,
-                            ttl_timer_ref       = undefined})
-  when TTL =/= undefined ->
-    case BQ:is_empty(BQS) of
-        true  -> State;
-        false -> TRef = erlang:send_after(TTL, self(), drop_expired),
-                 State#q{ttl_timer_ref = TRef}
+    ExpirePred = fun (#message_properties{expiry = Exp}) -> Now >= Exp end,
+    {Props, BQS1} =
+        case DLXFun of
+            undefined ->
+                {Next, undefined, BQS2} = BQ:dropwhile(ExpirePred, false, BQS),
+                {Next, BQS2};
+            _  ->
+                {Next, Msgs,      BQS2} = BQ:dropwhile(ExpirePred, true,  BQS),
+                lists:foreach(fun({Msg, AckTag}) -> DLXFun(Msg, AckTag) end,
+                              Msgs),
+                {Next, BQS2}
+        end,
+    ensure_ttl_timer(case Props of
+                         undefined                          -> undefined;
+                         #message_properties{expiry = Exp}  -> Exp
+                     end, State#q{backing_queue_state = BQS1}).
+
+ensure_ttl_timer(undefined, State) ->
+    State;
+ensure_ttl_timer(_Expiry, State = #q{ttl = undefined}) ->
+    State;
+ensure_ttl_timer(Expiry, State = #q{ttl_timer_ref = undefined}) ->
+    After = (case Expiry - now_micros() of
+                 V when V > 0 -> V + 999; %% always fire later
+                 _            -> 0
+             end) div 1000,
+    TRef = erlang:send_after(After, self(), drop_expired),
+    State#q{ttl_timer_ref = TRef, ttl_timer_expiry = Expiry};
+ensure_ttl_timer(Expiry, State = #q{ttl_timer_ref    = TRef,
+                                    ttl_timer_expiry = TExpiry})
+  when Expiry + 1000 < TExpiry ->
+    case erlang:cancel_timer(TRef) of
+        false -> State;
+        _     -> ensure_ttl_timer(Expiry, State#q{ttl_timer_ref = undefined})
     end;
-ensure_ttl_timer(State) ->
+ensure_ttl_timer(_Expiry, State) ->
     State.
 
 ack_if_no_dlx(AckTags, State = #q{dlx                 = undefined,
diff --git a/src/rabbit_backing_queue.erl b/src/rabbit_backing_queue.erl
@@ -123,9 +123,11 @@
 %% necessitate an ack or not. If they do, the function returns a list of
 %% messages with the respective acktags.
 -callback dropwhile(msg_pred(), true, state())
-                   -> {[{rabbit_types:basic_message(), ack()}], state()};
+                   -> {rabbit_types:message_properties() | undefined,
+                       [{rabbit_types:basic_message(), ack()}], state()};
                    (msg_pred(), false, state())
-                   -> {undefined, state()}.
+                   -> {rabbit_types:message_properties() | undefined,
+                       undefined, state()}.
 
 %% Produce the next message.
 -callback fetch(true,  state()) -> {fetch_result(ack()), state()};
diff --git a/src/rabbit_backing_queue_qc.erl b/src/rabbit_backing_queue_qc.erl
@@ -268,7 +268,7 @@ next_state(S, Res, {call, ?BQMOD, drain_confirmed, _Args}) ->
     S#state{bqstate = BQ1};
 
 next_state(S, Res, {call, ?BQMOD, dropwhile, _Args}) ->
-    BQ = {call, erlang, element, [2, Res]},
+    BQ = {call, erlang, element, [3, Res]},
     #state{messages = Messages} = S,
     Msgs1 = drop_messages(Messages),
     S#state{bqstate = BQ, len = gb_trees:size(Msgs1), messages = Msgs1};
diff --git a/src/rabbit_mirror_queue_master.erl b/src/rabbit_mirror_queue_master.erl
@@ -185,13 +185,13 @@ dropwhile(Pred, AckRequired,
                          set_delivered       = SetDelivered,
                          backing_queue_state = BQS }) ->
     Len  = BQ:len(BQS),
-    {Msgs, BQS1} = BQ:dropwhile(Pred, AckRequired, BQS),
+    {Next, Msgs, BQS1} = BQ:dropwhile(Pred, AckRequired, BQS),
     Len1 = BQ:len(BQS1),
     ok = gm:broadcast(GM, {set_length, Len1, AckRequired}),
     Dropped = Len - Len1,
     SetDelivered1 = lists:max([0, SetDelivered - Dropped]),
-    {Msgs, State #state { backing_queue_state = BQS1,
-                          set_delivered       = SetDelivered1 } }.
+    {Next, Msgs, State #state { backing_queue_state = BQS1,
+                                set_delivered       = SetDelivered1 } }.
 
 drain_confirmed(State = #state { backing_queue       = BQ,
                                  backing_queue_state = BQS,
diff --git a/src/rabbit_mnesia.erl b/src/rabbit_mnesia.erl
@@ -726,40 +726,42 @@ reset(Force) ->
                                                         end]),
     ensure_mnesia_not_running(),
     case not Force andalso is_clustered() andalso
-         is_only_disc_node(node(), false)
+        is_only_disc_node(node(), false)
     of
         true  -> log_both("no other disc nodes running");
         false -> ok
     end,
-    Node = node(),
-    Nodes = all_clustered_nodes() -- [Node],
     case Force of
-        true  -> ok;
+        true ->
+            disconnect_nodes(nodes());
         false ->
             ensure_mnesia_dir(),
             start_mnesia(),
-            RunningNodes =
+            {Nodes, RunningNodes} =
                 try
                     %% Force=true here so that reset still works when clustered
                     %% with a node which is down
                     ok = init_db(read_cluster_nodes_config(), true),
-                    running_clustered_nodes() -- [Node]
+                    {all_clustered_nodes()     -- [node()],
+                     running_clustered_nodes() -- [node()]}
                 after
                     stop_mnesia()
                 end,
             leave_cluster(Nodes, RunningNodes),
-            rabbit_misc:ensure_ok(mnesia:delete_schema([Node]),
-                                  cannot_delete_schema)
+            rabbit_misc:ensure_ok(mnesia:delete_schema([node()]),
+                                  cannot_delete_schema),
+            disconnect_nodes(Nodes)
     end,
-    %% We need to make sure that we don't end up in a distributed
-    %% Erlang system with nodes while not being in an Mnesia cluster
-    %% with them. We don't handle that well.
-    [erlang:disconnect_node(N) || N <- Nodes],
     ok = delete_cluster_nodes_config(),
     %% remove persisted messages and any other garbage we find
     ok = rabbit_file:recursive_delete(filelib:wildcard(dir() ++ "/*")),
     ok.
 
+%% We need to make sure that we don't end up in a distributed Erlang
+%% system with nodes while not being in an Mnesia cluster with
+%% them. We don't handle that well.
+disconnect_nodes(Nodes) -> [erlang:disconnect_node(N) || N <- Nodes].
+
 leave_cluster([], _) -> ok;
 leave_cluster(Nodes, RunningNodes) ->
     %% find at least one running cluster node and instruct it to
diff --git a/src/rabbit_tests.erl b/src/rabbit_tests.erl
@@ -72,12 +72,10 @@ maybe_run_cluster_dependent_tests() ->
 run_cluster_dependent_tests(SecondaryNode) ->
     SecondaryNodeS = atom_to_list(SecondaryNode),
 
-    cover:stop(SecondaryNode),
     ok = control_action(stop_app, []),
-    ok = control_action(reset, []),
+    ok = safe_reset(),
     ok = control_action(cluster, [SecondaryNodeS]),
     ok = control_action(start_app, []),
-    cover:start(SecondaryNode),
     ok = control_action(start_app, SecondaryNode, [], []),
 
     io:format("Running cluster dependent tests with node ~p~n", [SecondaryNode]),
@@ -908,7 +906,7 @@ test_cluster_management2(SecondaryNode) ->
     ok = assert_ram_node(),
 
     %% join cluster as a ram node
-    ok = control_action(reset, []),
+    ok = safe_reset(),
     ok = control_action(force_cluster, [SecondaryNodeS, "invalid1@invalid"]),
     ok = control_action(start_app, []),
     ok = control_action(stop_app, []),
@@ -965,29 +963,30 @@ test_cluster_management2(SecondaryNode) ->
     ok = assert_disc_node(),
 
     %% turn a disk node into a ram node
-    ok = control_action(reset, []),
+    %%
+    %% can't use safe_reset here since for some reason nodes()==[] and
+    %% yet w/o stopping coverage things break
+    with_suspended_cover(
+      [SecondaryNode], fun () -> ok = control_action(reset, []) end),
     ok = control_action(cluster, [SecondaryNodeS]),
     ok = control_action(start_app, []),
     ok = control_action(stop_app, []),
     ok = assert_ram_node(),
 
     %% NB: this will log an inconsistent_database error, which is harmless
-    %% Turning cover on / off is OK even if we're not in general using cover,
-    %% it just turns the engine on / off, doesn't actually log anything.
-    cover:stop([SecondaryNode]),
-    true = disconnect_node(SecondaryNode),
-    pong = net_adm:ping(SecondaryNode),
-    cover:start([SecondaryNode]),
+    with_suspended_cover(
+      [SecondaryNode], fun () ->
+                               true = disconnect_node(SecondaryNode),
+                               pong = net_adm:ping(SecondaryNode)
+                       end),
 
     %% leaving a cluster as a ram node
-    ok = control_action(reset, []),
+    ok = safe_reset(),
     %% ...and as a disk node
     ok = control_action(cluster, [SecondaryNodeS, NodeS]),
     ok = control_action(start_app, []),
     ok = control_action(stop_app, []),
-    cover:stop(SecondaryNode),
-    ok = control_action(reset, []),
-    cover:start(SecondaryNode),
+    ok = safe_reset(),
 
     %% attempt to leave cluster when no other node is alive
     ok = control_action(cluster, [SecondaryNodeS, NodeS]),
@@ -1002,22 +1001,39 @@ test_cluster_management2(SecondaryNode) ->
         control_action(cluster, [SecondaryNodeS]),
 
     %% leave system clustered, with the secondary node as a ram node
-    ok = control_action(force_reset, []),
+    with_suspended_cover(
+      [SecondaryNode], fun () -> ok = control_action(force_reset, []) end),
     ok = control_action(start_app, []),
     %% Yes, this is rather ugly. But since we're a clustered Mnesia
     %% node and we're telling another clustered node to reset itself,
     %% we will get disconnected half way through causing a
     %% badrpc. This never happens in real life since rabbitmqctl is
-    %% not a clustered Mnesia node.
-    cover:stop(SecondaryNode),
-    {badrpc, nodedown} = control_action(force_reset, SecondaryNode, [], []),
-    pong = net_adm:ping(SecondaryNode),
-    cover:start(SecondaryNode),
+    %% not a clustered Mnesia node and is a hidden node.
+    with_suspended_cover(
+      [SecondaryNode],
+      fun () ->
+              {badrpc, nodedown} =
+                  control_action(force_reset, SecondaryNode, [], []),
+              pong = net_adm:ping(SecondaryNode)
+      end),
     ok = control_action(cluster, SecondaryNode, [NodeS], []),
     ok = control_action(start_app, SecondaryNode, [], []),
 
     passed.
 
+%% 'cover' does not cope at all well with nodes disconnecting, which
+%% happens as part of reset. So we turn it off temporarily. That is ok
+%% even if we're not in general using cover, it just turns the engine
+%% on / off and doesn't log anything.
+safe_reset() -> with_suspended_cover(
+                  nodes(), fun () -> control_action(reset, []) end).
+
+with_suspended_cover(Nodes, Fun) ->
+    cover:stop(Nodes),
+    Res = Fun(),
+    cover:start(Nodes),
+    Res.
+
 test_user_management() ->
 
     %% lots if stuff that should fail
@@ -2388,10 +2404,10 @@ test_dropwhile(VQ0) ->
             fun (N, Props) -> Props#message_properties{expiry = N} end, VQ0),
 
     %% drop the first 5 messages
-    {undefined, VQ2} = rabbit_variable_queue:dropwhile(
-                         fun(#message_properties { expiry = Expiry }) ->
-                                 Expiry =< 5
-                         end, false, VQ1),
+    {_, undefined, VQ2} = rabbit_variable_queue:dropwhile(
+                            fun(#message_properties { expiry = Expiry }) ->
+                                    Expiry =< 5
+                            end, false, VQ1),
 
     %% fetch five now
     VQ3 = lists:foldl(fun (_N, VQN) ->
@@ -2408,11 +2424,11 @@ test_dropwhile(VQ0) ->
 test_dropwhile_varying_ram_duration(VQ0) ->
     VQ1 = variable_queue_publish(false, 1, VQ0),
     VQ2 = rabbit_variable_queue:set_ram_duration_target(0, VQ1),
-    {undefined, VQ3} = rabbit_variable_queue:dropwhile(
-                         fun(_) -> false end, false, VQ2),
+    {_, undefined, VQ3} = rabbit_variable_queue:dropwhile(
+                            fun(_) -> false end, false, VQ2),
     VQ4 = rabbit_variable_queue:set_ram_duration_target(infinity, VQ3),
     VQ5 = variable_queue_publish(false, 1, VQ4),
-    {undefined, VQ6} =
+    {_, undefined, VQ6} =
         rabbit_variable_queue:dropwhile(fun(_) -> false end, false, VQ5),
     VQ6.
 
diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl
@@ -581,12 +581,12 @@ drain_confirmed(State = #vqstate { confirmed = C }) ->
 dropwhile(Pred, AckRequired, State) -> dropwhile(Pred, AckRequired, State, []).
 
 dropwhile(Pred, AckRequired, State, Msgs) ->
-    End = fun(S) when AckRequired -> {lists:reverse(Msgs), S};
-             (S)                  -> {undefined, S}
+    End = fun(Next, S) when AckRequired -> {Next, lists:reverse(Msgs), S};
+             (Next, S)                  -> {Next, undefined, S}
           end,
     case queue_out(State) of
         {empty, State1} ->
-            End(a(State1));
+            End(undefined, a(State1));
         {{value, MsgStatus = #msg_status { msg_props = MsgProps }}, State1} ->
             case {Pred(MsgProps), AckRequired} of
                 {true, true} ->
@@ -598,7 +598,7 @@ dropwhile(Pred, AckRequired, State, Msgs) ->
                     {_, State2} = internal_fetch(false, MsgStatus, State1),
                     dropwhile(Pred, AckRequired, State2, undefined);
                 {false, _} ->
-                    End(a(in_r(MsgStatus, State1)))
+                    End(MsgProps, a(in_r(MsgStatus, State1)))
             end
     end.
 
diff --git a/src/vm_memory_monitor.erl b/src/vm_memory_monitor.erl
@@ -49,6 +49,7 @@
 
 -record(state, {total_memory,
                 memory_limit,
+                memory_fraction,
                 timeout,
                 timer,
                 alarmed
@@ -110,7 +111,7 @@ init([MemFraction]) ->
     {ok, set_mem_limits(State, MemFraction)}.
 
 handle_call(get_vm_memory_high_watermark, _From, State) ->
-    {reply, State#state.memory_limit / State#state.total_memory, State};
+    {reply, State#state.memory_fraction, State};
 
 handle_call({set_vm_memory_high_watermark, MemFraction}, _From, State) ->
     State1 = set_mem_limits(State, MemFraction),
@@ -171,8 +172,9 @@ set_mem_limits(State, MemFraction) ->
     MemLim = get_mem_limit(MemFraction, TotalMemory),
     error_logger:info_msg("Memory limit set to ~pMB of ~pMB total.~n",
                           [trunc(MemLim/?ONE_MB), trunc(TotalMemory/?ONE_MB)]),
-    internal_update(State #state { total_memory = TotalMemory,
-                                   memory_limit = MemLim }).
+    internal_update(State #state { total_memory    = TotalMemory,
+                                   memory_limit    = MemLim,
+                                   memory_fraction = MemFraction}).
 
 internal_update(State = #state { memory_limit = MemLimit,
                                  alarmed = Alarmed}) ->