rabbit_maintenance: Replicate rabbit_node_maintenance_states Mnesia table

dumbbell · dumbbell · commit b82ff3752ced · 2023-08-04T17:10:58.000+02:00
[Why]
So far, the code only ensured the table existed. Because it is a
non-local Mnesia table, its presence on a single node was enough. This
is not what we want here: we want the table to be replicated to all
nodes across the cluster.

This was detected while working on the integration of Khepri. In our
work in progress, the Mnesia table was declared differently and
replicated. This caused mixed-version testing to fail because nodes were
hanging forever while trying to force-load that Mnesia table. The hang
was explained by the fact that the node having that single table copy
was stopped or restarted and thus was unavailable, preventing the load
of the table.

[How]
After the table is declare, we use `rabbit_table:ensure_table_copy/3` to
make sure the table is replicated to the local node. Because all nodes
call that boot step, each of them takes care of configuring its copy. In
the end, the table is replicated everywhere.

V2: We also try to add replicas on remote nodes that don't have one yet.
    This reduces the risk of having a node waiting forever that the
    table becomes available on another node. Failures to add remote
    replicas are ignored as they should not be fatal and prevent the
    current node from starting.
diff --git a/deps/rabbit/src/rabbit_db_maintenance.erl b/deps/rabbit/src/rabbit_db_maintenance.erl
@@ -24,7 +24,7 @@
 %% setup_schema().
 %% -------------------------------------------------------------------
 
--spec setup_schema() -> ok.
+-spec setup_schema() -> ok | {error, any()}.
 %% @doc Creates the internal schema used by the selected metadata store
 %%
 %% @private
@@ -40,9 +40,41 @@ setup_schema_in_mnesia() ->
       "Creating table ~ts for maintenance mode status",
       [TableName]),
     try
-        _ = rabbit_table:create(
-              TableName,
-              status_table_definition())
+        rabbit_table:create(
+          TableName,
+          status_table_definition()),
+        %% The `rabbit_node_maintenance_states' table used to be global but not
+        %% replicated. This leads to various errors during RabbitMQ boot or
+        %% operations on the Mnesia database. The reason is the table existed
+        %% on a single node and, if that node was stopped or MIA, other nodes
+        %% may wait forever on that node for the table to be available.
+        %%
+        %% The call below makes sure this node has a copy of the table.
+        case rabbit_table:ensure_table_copy(TableName, node(), ram_copies) of
+            ok ->
+                %% Next, we try to fix other nodes in the cluster if they are
+                %% running a version of RabbitMQ which does not replicate the
+                %% table. All nodes must have a replica for Mnesia operations
+                %% to work properly. Therefore the code below is to make older
+                %% compatible with newer nodes.
+                Replicas = mnesia:table_info(TableName, all_nodes),
+                Members = rabbit_nodes:list_running(),
+                MissingOn = Members -- Replicas,
+                lists:foreach(
+                  fun(Node) ->
+                          %% Errors from adding a replica on those older nodes
+                          %% are ignored however. They should not be fatal. The
+                          %% problem will solve by itself once all nodes are
+                          %% upgraded.
+                          _ = rpc:call(
+                                Node,
+                                rabbit_table, ensure_table_copy,
+                                [TableName, Node, ram_copies])
+                  end, MissingOn),
+                ok;
+            Error ->
+                Error
+        end
     catch throw:Reason  ->
             rabbit_log:error(
               "Failed to create maintenance status table: ~tp",