Skip to content

Commit 746373a

Browse files
authored
Merge pull request #7534 from rabbitmq/fix-rabbit_db_cluster-members-for-ram-nodes
rabbit_db_cluster: Figure out members list even if Mnesia is stopped
2 parents 37381a1 + 3546556 commit 746373a

File tree

3 files changed

+51
-11
lines changed

3 files changed

+51
-11
lines changed

deps/rabbit/src/rabbit_db_cluster.erl

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
-module(rabbit_db_cluster).
99

1010
-include_lib("kernel/include/logger.hrl").
11-
-include_lib("stdlib/include/assert.hrl").
1211

1312
-include_lib("rabbit_common/include/logging.hrl").
1413

@@ -103,7 +102,25 @@ members() ->
103102
members_using_mnesia().
104103

105104
members_using_mnesia() ->
106-
mnesia:system_info(db_nodes).
105+
case rabbit_mnesia:is_running() andalso rabbit_table:is_present() of
106+
true ->
107+
%% If Mnesia is running locally and some tables exist, we can know
108+
%% the database was initialized and we can query the list of
109+
%% members.
110+
mnesia:system_info(db_nodes);
111+
false ->
112+
try
113+
%% When Mnesia is not running, we fall back to reading the
114+
%% cluster status files stored on disk, if they exist.
115+
{Members, _, _} = rabbit_node_monitor:read_cluster_status(),
116+
Members
117+
catch
118+
throw:{error, _Reason}:_Stacktrace ->
119+
%% If we couldn't read those files, we consider that only
120+
%% this node is part of the "cluster".
121+
[node()]
122+
end
123+
end.
107124

108125
-spec disc_members() -> Members when
109126
Members :: [node()].

deps/rabbit/src/rabbit_mnesia.erl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
%% Various queries to get the status of the db
2121
status/0,
22+
is_running/0,
2223
is_clustered/0,
2324
on_running_node/1,
2425
is_process_alive/1,

deps/rabbit/test/clustering_management_SUITE.erl

Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -526,7 +526,8 @@ change_cluster_when_node_offline(Config) ->
526526
assert_cluster_status({[Bunny], [Bunny], []}, [Bunny]),
527527
assert_cluster_status({[Rabbit, Hare], [Rabbit, Hare], [Hare]}, [Hare]),
528528
assert_cluster_status(
529-
{[Rabbit, Hare, Bunny], [Rabbit, Hare, Bunny], [Hare, Bunny]}, [Rabbit]),
529+
{[Rabbit, Hare, Bunny], [Hare], [Rabbit, Hare, Bunny],
530+
[Rabbit, Hare, Bunny], [Hare, Bunny]}, [Rabbit]),
530531

531532
%% Bring Rabbit back up
532533
ok = start_app(Rabbit),
@@ -756,8 +757,17 @@ pid_from_file(PidFile) ->
756757
cluster_members(Config) ->
757758
rabbit_ct_broker_helpers:get_node_configs(Config, nodename).
758759

760+
assert_cluster_status({All, Disc, Running}, Nodes) ->
761+
assert_cluster_status({All, Running, All, Disc, Running}, Nodes);
759762
assert_cluster_status(Status0, Nodes) ->
760-
Status = {AllNodes, _, _} = sort_cluster_status(Status0),
763+
Status = sort_cluster_status(Status0),
764+
AllNodes = case Status of
765+
{undef, undef, All, _, _} ->
766+
%% Support mixed-version clusters
767+
All;
768+
{All, _, _, _, _} ->
769+
All
770+
end,
761771
wait_for_cluster_status(Status, AllNodes, Nodes).
762772

763773
wait_for_cluster_status(Status, AllNodes, Nodes) ->
@@ -768,7 +778,8 @@ wait_for_cluster_status(N, Max, Status, _AllNodes, Nodes) when N >= Max ->
768778
erlang:error({cluster_status_max_tries_failed,
769779
[{nodes, Nodes},
770780
{expected_status, Status},
771-
{max_tried, Max}]});
781+
{max_tried, Max},
782+
{status, sort_cluster_status(cluster_status(hd(Nodes)))}]});
772783
wait_for_cluster_status(N, Max, Status, AllNodes, Nodes) ->
773784
case lists:all(fun (Node) ->
774785
verify_status_equal(Node, Status, AllNodes)
@@ -781,21 +792,32 @@ wait_for_cluster_status(N, Max, Status, AllNodes, Nodes) ->
781792
verify_status_equal(Node, Status, AllNodes) ->
782793
NodeStatus = sort_cluster_status(cluster_status(Node)),
783794
(AllNodes =/= [Node]) =:= rpc:call(Node, rabbit_db_cluster, is_clustered, [])
784-
andalso NodeStatus =:= Status.
795+
andalso equal(Status, NodeStatus).
796+
797+
equal({_, _, A, B, C}, {undef, undef, A, B, C}) ->
798+
true;
799+
equal({_, _, _, _, _}, {undef, undef, _, _, _}) ->
800+
false;
801+
equal(Status0, Status1) ->
802+
Status0 == Status1.
785803

786804
cluster_status(Node) ->
787-
{rpc:call(Node, rabbit_mnesia, cluster_nodes, [all]),
805+
{rpc:call(Node, rabbit_nodes, list_members, []),
806+
rpc:call(Node, rabbit_nodes, list_running, []),
807+
rpc:call(Node, rabbit_mnesia, cluster_nodes, [all]),
788808
rpc:call(Node, rabbit_mnesia, cluster_nodes, [disc]),
789809
rpc:call(Node, rabbit_mnesia, cluster_nodes, [running])}.
790810

791-
sort_cluster_status({All, Disc, Running}) ->
792-
{lists:sort(All), lists:sort(Disc), lists:sort(Running)}.
811+
sort_cluster_status({{badrpc, {'EXIT', {undef, _}}}, {badrpc, {'EXIT', {undef, _}}}, AllM, DiscM, RunningM}) ->
812+
{undef, undef, lists:sort(AllM), lists:sort(DiscM), lists:sort(RunningM)};
813+
sort_cluster_status({All, Running, AllM, DiscM, RunningM}) ->
814+
{lists:sort(All), lists:sort(Running), lists:sort(AllM), lists:sort(DiscM), lists:sort(RunningM)}.
793815

794816
assert_clustered(Nodes) ->
795-
assert_cluster_status({Nodes, Nodes, Nodes}, Nodes).
817+
assert_cluster_status({Nodes, Nodes, Nodes, Nodes, Nodes}, Nodes).
796818

797819
assert_not_clustered(Node) ->
798-
assert_cluster_status({[Node], [Node], [Node]}, [Node]).
820+
assert_cluster_status({[Node], [Node], [Node], [Node], [Node]}, [Node]).
799821

800822
assert_failure(Fun) ->
801823
case catch Fun() of

0 commit comments

Comments
 (0)