Skip to content

Commit d489fc9

Browse files
authored
Merge pull request #9005 from rabbitmq/replicate-rabbit_node_maintenance_states-table
rabbit_maintenance: Replicate `rabbit_node_maintenance_states` Mnesia table
2 parents ca6f270 + b82ff37 commit d489fc9

File tree

1 file changed

+36
-4
lines changed

1 file changed

+36
-4
lines changed

deps/rabbit/src/rabbit_db_maintenance.erl

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
%% setup_schema().
2525
%% -------------------------------------------------------------------
2626

27-
-spec setup_schema() -> ok.
27+
-spec setup_schema() -> ok | {error, any()}.
2828
%% @doc Creates the internal schema used by the selected metadata store
2929
%%
3030
%% @private
@@ -40,9 +40,41 @@ setup_schema_in_mnesia() ->
4040
"Creating table ~ts for maintenance mode status",
4141
[TableName]),
4242
try
43-
_ = rabbit_table:create(
44-
TableName,
45-
status_table_definition())
43+
rabbit_table:create(
44+
TableName,
45+
status_table_definition()),
46+
%% The `rabbit_node_maintenance_states' table used to be global but not
47+
%% replicated. This leads to various errors during RabbitMQ boot or
48+
%% operations on the Mnesia database. The reason is the table existed
49+
%% on a single node and, if that node was stopped or MIA, other nodes
50+
%% may wait forever on that node for the table to be available.
51+
%%
52+
%% The call below makes sure this node has a copy of the table.
53+
case rabbit_table:ensure_table_copy(TableName, node(), ram_copies) of
54+
ok ->
55+
%% Next, we try to fix other nodes in the cluster if they are
56+
%% running a version of RabbitMQ which does not replicate the
57+
%% table. All nodes must have a replica for Mnesia operations
58+
%% to work properly. Therefore the code below is to make older
59+
%% compatible with newer nodes.
60+
Replicas = mnesia:table_info(TableName, all_nodes),
61+
Members = rabbit_nodes:list_running(),
62+
MissingOn = Members -- Replicas,
63+
lists:foreach(
64+
fun(Node) ->
65+
%% Errors from adding a replica on those older nodes
66+
%% are ignored however. They should not be fatal. The
67+
%% problem will solve by itself once all nodes are
68+
%% upgraded.
69+
_ = rpc:call(
70+
Node,
71+
rabbit_table, ensure_table_copy,
72+
[TableName, Node, ram_copies])
73+
end, MissingOn),
74+
ok;
75+
Error ->
76+
Error
77+
end
4678
catch throw:Reason ->
4779
rabbit_log:error(
4880
"Failed to create maintenance status table: ~tp",

0 commit comments

Comments
 (0)