diff --git a/deps/rabbit/src/rabbit_db_cluster.erl b/deps/rabbit/src/rabbit_db_cluster.erl index 205f970bcbf0..a11ba80af42e 100644 --- a/deps/rabbit/src/rabbit_db_cluster.erl +++ b/deps/rabbit/src/rabbit_db_cluster.erl @@ -50,7 +50,7 @@ ensure_feature_flags_are_in_sync(Nodes, NodeIsVirgin) -> RemoteNode :: node(), Ret :: Ok | Error, Ok :: {ok, [node()]} | {ok, already_member}, - Error :: {error, {inconsistent_cluster, string()}}. + Error :: {error, {inconsistent_cluster, string()} | {error, {erpc, noconnection}}}. can_join(RemoteNode) -> ?LOG_INFO( @@ -82,7 +82,7 @@ can_join_using_khepri(RemoteNode) -> NodeType :: node_type(), Ret :: Ok | Error, Ok :: ok | {ok, already_member}, - Error :: {error, {inconsistent_cluster, string()}}. + Error :: {error, {inconsistent_cluster, string()} | {error, {erpc, noconnection}}}. %% @doc Adds this node to a cluster using `RemoteNode' to reach it. join(ThisNode, _NodeType) when ThisNode =:= node() -> @@ -214,6 +214,22 @@ join(RemoteNode, NodeType) end; {ok, already_member} -> {ok, already_member}; + {error, {inconsistent_cluster, _Msg}} = Error -> + case rabbit_khepri:is_enabled() of + true -> + Error; + false -> + %% rabbit_mnesia:can_join_cluster/1 notice inconsistent_cluster, + %% as RemoteNode thinks this node is already in the cluster. + %% Attempt to leave the RemoteNode cluster, the discovery cluster, + %% and simply retry the operation. + rabbit_log:info("Mnesia: node ~tp thinks it's clustered " + "with node ~tp, but ~tp disagrees. ~tp will ask " + "to leave the cluster and try again.", + [RemoteNode, node(), node(), node()]), + ok = rabbit_mnesia:leave_then_rediscover_cluster(RemoteNode), + join(RemoteNode, NodeType) + end; {error, _} = Error -> Error end. diff --git a/deps/rabbit/src/rabbit_mnesia.erl b/deps/rabbit/src/rabbit_mnesia.erl index d7b010c1502a..89ef6e726b91 100644 --- a/deps/rabbit/src/rabbit_mnesia.erl +++ b/deps/rabbit/src/rabbit_mnesia.erl @@ -73,7 +73,7 @@ -export([node_info/0, remove_node_if_mnesia_running/1]). %% Used internally in `rabbit_db_cluster'. --export([members/0]). +-export([members/0, leave_then_rediscover_cluster/1]). %% Used internally in `rabbit_khepri'. -export([mnesia_and_msg_store_files/0]). @@ -155,7 +155,7 @@ init() -> %% we cluster to its cluster. -spec can_join_cluster(node()) - -> {ok, [node()]} | {ok, already_member} | {error, {inconsistent_cluster, string()}}. + -> {ok, [node()]} | {ok, already_member} | {error, {inconsistent_cluster, string()} | {error, {erpc, noconnection}}}. can_join_cluster(DiscoveryNode) -> ensure_mnesia_dir(), @@ -179,7 +179,6 @@ can_join_cluster(DiscoveryNode) -> {ok, already_member}; false -> Msg = format_inconsistent_cluster_message(DiscoveryNode, node()), - rabbit_log:error(Msg), {error, {inconsistent_cluster, Msg}} end end. @@ -923,15 +922,19 @@ remove_node_if_mnesia_running(Node) -> end end. -leave_cluster() -> - case rabbit_nodes:nodes_excl_me(cluster_nodes(all)) of - [] -> ok; - AllNodes -> case lists:any(fun leave_cluster/1, AllNodes) of - true -> ok; - false -> e(no_running_cluster_nodes) - end - end. +leave_then_rediscover_cluster(DiscoveryNode) -> + {ClusterNodes, _, _} = discover_cluster([DiscoveryNode]), + leave_cluster(rabbit_nodes:nodes_excl_me(ClusterNodes)). +leave_cluster() -> + leave_cluster(rabbit_nodes:nodes_excl_me(cluster_nodes(all))). +leave_cluster([]) -> + ok; +leave_cluster(Nodes) when is_list(Nodes) -> + case lists:any(fun leave_cluster/1, Nodes) of + true -> ok; + false -> e(no_running_cluster_nodes) + end; leave_cluster(Node) -> case rpc:call(Node, rabbit_mnesia, remove_node_if_mnesia_running, [node()]) of