nodeup

dumbbell · dumbbell · commit 854160bfde67 · 2025-09-23T15:51:02.000+02:00
diff --git a/deps/rabbit/src/rabbit_amqqueue.erl b/deps/rabbit/src/rabbit_amqqueue.erl
@@ -1944,8 +1944,16 @@ is_dead_exclusive(Q) when ?amqqueue_exclusive_owner_is_pid(Q) ->
 
 -spec on_node_up(node()) -> 'ok'.
 
-on_node_up(_Node) ->
-    ok.
+on_node_up(Node) ->
+    case rabbit_khepri:is_enabled() of
+        true ->
+            %% With Khepri, we try to delete transient queues now because it's
+            %% possible any updates timed out because of the lack of a quorum
+            %% while `Node' was down.
+            ok = delete_transient_queues_on_node(Node);
+        false ->
+            ok
+    end.
 
 -spec on_node_down(node()) -> 'ok'.
 
diff --git a/deps/rabbit/src/rabbit_node_monitor.erl b/deps/rabbit/src/rabbit_node_monitor.erl
@@ -430,16 +430,8 @@ handle_call(status, _From, State = #state{partitions = Partitions}) ->
 handle_call(_Request, _From, State) ->
     {noreply, State}.
 
-handle_cast(notify_node_up, State = #state{guid = GUID}) ->
-    Nodes = rabbit_nodes:list_reachable() -- [node()],
-    gen_server:abcast(Nodes, ?SERVER,
-                      {node_up, node(), rabbit_db_cluster:node_type(), GUID}),
-    %% register other active rabbits with this rabbit
-    DiskNodes = rabbit_db_cluster:disc_members(),
-    [gen_server:cast(?SERVER, {node_up, N, case lists:member(N, DiskNodes) of
-                                               true  -> disc;
-                                               false -> ram
-                                           end}) || N <- Nodes],
+handle_cast(notify_node_up, State) ->
+    do_notify_node_up(State),
     {noreply, State};
 
 %%----------------------------------------------------------------------------
@@ -665,6 +657,12 @@ handle_info({nodedown, Node, Info}, State) ->
 
 handle_info({nodeup, Node, _Info}, State) ->
     ?LOG_INFO("node ~tp up", [Node]),
+    %% We notify that `rabbit' is up here too (in addition from the message
+    %% send explicitly by a boot step. That's because nodes may go down then
+    %% up during a network partition, and with Khepri, nodes are not restarted
+    %% (unlike with some partition handling strategies used with Mnesia), and
+    %% thus the boot steps are not executed.
+    do_notify_node_up(State),
     {noreply, State};
 
 handle_info({mnesia_system_event,
@@ -854,6 +852,20 @@ wait_for_cluster_recovery(Condition) ->
                  wait_for_cluster_recovery(Condition)
     end.
 
+do_notify_node_up(#state{guid = GUID}) ->
+    Nodes = rabbit_nodes:list_reachable() -- [node()],
+    gen_server:abcast(Nodes, ?SERVER,
+                      {node_up, node(), rabbit_db_cluster:node_type(), GUID}),
+    %% register other active rabbits with this rabbit
+    DiskNodes = rabbit_db_cluster:disc_members(),
+    _ = [gen_server:cast(
+           ?SERVER,
+           {node_up, N, case lists:member(N, DiskNodes) of
+                            true  -> disc;
+                            false -> ram
+                        end}) || N <- Nodes],
+    ok.
+
 handle_dead_rabbit(Node, State) ->
     %% TODO: This may turn out to be a performance hog when there are
     %% lots of nodes.  We really only need to execute some of these