Skip to content

Commit b819638

Browse files
Merge pull request #12113 from rabbitmq/await-quorum-logging
rabbitmq-upgrade await_quorum_plus_one improvements
2 parents 96fc028 + 6b444ae commit b819638

File tree

4 files changed

+69
-39
lines changed

4 files changed

+69
-39
lines changed

deps/rabbit/src/rabbit_upgrade_preparation.erl

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
%%
1616

1717
-define(SAMPLING_INTERVAL, 200).
18+
-define(LOGGING_FREQUENCY, ?SAMPLING_INTERVAL * 100).
1819

1920
await_online_quorum_plus_one(Timeout) ->
2021
Iterations = ceil(Timeout / ?SAMPLING_INTERVAL),
@@ -30,7 +31,11 @@ online_members(Component) ->
3031
erlang, whereis, [Component])).
3132

3233
endangered_critical_components() ->
33-
CriticalComponents = [rabbit_stream_coordinator],
34+
CriticalComponents = [rabbit_stream_coordinator] ++
35+
case rabbit_feature_flags:is_enabled(khepri_db) of
36+
true -> [rabbitmq_metadata];
37+
false -> []
38+
end,
3439
Nodes = rabbit_nodes:list_members(),
3540
lists:filter(fun (Component) ->
3641
NumAlive = length(online_members(Component)),
@@ -57,6 +62,21 @@ do_await_safe_online_quorum(IterationsLeft) ->
5762
case EndangeredQueues =:= [] andalso endangered_critical_components() =:= [] of
5863
true -> true;
5964
false ->
65+
case IterationsLeft rem ?LOGGING_FREQUENCY of
66+
0 ->
67+
case length(EndangeredQueues) of
68+
0 -> ok;
69+
N -> rabbit_log:info("Waiting for ~ts queues and streams to have quorum+1 replicas online."
70+
"You can list them with `rabbitmq-diagnostics check_if_node_is_quorum_critical`", [N])
71+
end,
72+
case endangered_critical_components() of
73+
[] -> ok;
74+
_ -> rabbit_log:info("Waiting for the following critical components to have quorum+1 replicas online: ~p.",
75+
[endangered_critical_components()])
76+
end;
77+
_ ->
78+
ok
79+
end,
6080
timer:sleep(?SAMPLING_INTERVAL),
6181
do_await_safe_online_quorum(IterationsLeft - 1)
6282
end.
@@ -70,6 +90,6 @@ list_with_minimum_quorum_for_cli() ->
7090
[#{
7191
<<"readable_name">> => C,
7292
<<"name">> => C,
73-
<<"virtual_host">> => "-",
93+
<<"virtual_host">> => <<"(not applicable)">>,
7494
<<"type">> => process
7595
} || C <- endangered_critical_components()].

deps/rabbit/test/upgrade_preparation_SUITE.erl

Lines changed: 36 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -14,20 +14,16 @@
1414

1515
all() ->
1616
[
17-
{group, quorum_queue},
18-
{group, stream}
17+
{group, clustered}
1918
].
2019

2120
groups() ->
2221
[
23-
{quorum_queue, [], [
24-
await_quorum_plus_one_qq
25-
]},
26-
{stream, [], [
27-
await_quorum_plus_one_stream
28-
]},
29-
{stream_coordinator, [], [
30-
await_quorum_plus_one_stream_coordinator
22+
{clustered, [], [
23+
await_quorum_plus_one_qq,
24+
await_quorum_plus_one_stream,
25+
await_quorum_plus_one_stream_coordinator,
26+
await_quorum_plus_one_rabbitmq_metadata
3127
]}
3228
].
3329

@@ -44,31 +40,30 @@ end_per_suite(Config) ->
4440
rabbit_ct_helpers:run_teardown_steps(Config).
4541

4642
init_per_group(Group, Config) ->
47-
case rabbit_ct_helpers:is_mixed_versions() of
48-
true ->
49-
%% in a 3.8/3.9 mixed cluster, ra will not cluster across versions,
50-
%% so quorum plus one will not be achieved
51-
{skip, "not mixed versions compatible"};
52-
_ ->
53-
Config1 = rabbit_ct_helpers:set_config(Config,
54-
[
55-
{rmq_nodes_count, 3},
56-
{rmq_nodename_suffix, Group}
57-
]),
58-
rabbit_ct_helpers:run_steps(Config1,
59-
rabbit_ct_broker_helpers:setup_steps() ++
60-
rabbit_ct_client_helpers:setup_steps())
61-
end.
43+
Config1 = rabbit_ct_helpers:set_config(Config,
44+
[
45+
{rmq_nodes_count, 3},
46+
{rmq_nodename_suffix, Group}
47+
]),
48+
rabbit_ct_helpers:run_steps(Config1,
49+
rabbit_ct_broker_helpers:setup_steps() ++
50+
rabbit_ct_client_helpers:setup_steps()).
6251

6352
end_per_group(_Group, Config) ->
6453
rabbit_ct_helpers:run_steps(Config,
6554
rabbit_ct_client_helpers:teardown_steps() ++
6655
rabbit_ct_broker_helpers:teardown_steps()).
6756

6857

69-
init_per_testcase(TestCase, Config) ->
70-
rabbit_ct_helpers:testcase_started(Config, TestCase),
71-
Config.
58+
init_per_testcase(Testcase, Config) when Testcase == await_quorum_plus_one_rabbitmq_metadata ->
59+
case rabbit_ct_helpers:is_mixed_versions() of
60+
true ->
61+
{skip, "not mixed versions compatible"};
62+
_ ->
63+
rabbit_ct_helpers:testcase_started(Config, Testcase)
64+
end;
65+
init_per_testcase(Testcase, Config) ->
66+
rabbit_ct_helpers:testcase_started(Config, Testcase).
7267

7368
end_per_testcase(TestCase, Config) ->
7469
rabbit_ct_helpers:testcase_finished(Config, TestCase).
@@ -120,12 +115,24 @@ await_quorum_plus_one_stream_coordinator(Config) ->
120115
%% no queues/streams beyond this point
121116

122117
ok = rabbit_ct_broker_helpers:stop_node(Config, B),
123-
%% this should fail because the corrdinator has only 2 running nodes
118+
%% this should fail because the coordinator has only 2 running nodes
124119
?assertNot(await_quorum_plus_one(Config, 0)),
125120

126121
ok = rabbit_ct_broker_helpers:start_node(Config, B),
127122
?assert(await_quorum_plus_one(Config, 0)).
128123

124+
await_quorum_plus_one_rabbitmq_metadata(Config) ->
125+
Nodes = [A, B, _C] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename),
126+
ok = rabbit_ct_broker_helpers:enable_feature_flag(Config, Nodes, khepri_db),
127+
?assert(await_quorum_plus_one(Config, A)),
128+
129+
ok = rabbit_ct_broker_helpers:stop_node(Config, B),
130+
%% this should fail because rabbitmq_metadata has only 2 running nodes
131+
?assertNot(await_quorum_plus_one(Config, A)),
132+
133+
ok = rabbit_ct_broker_helpers:start_node(Config, B),
134+
?assert(await_quorum_plus_one(Config, A)).
135+
129136
%%
130137
%% Implementation
131138
%%

deps/rabbitmq_cli/lib/rabbitmq/cli/streams/commands/add_replica_command.ex

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,10 @@ defmodule RabbitMQ.CLI.Streams.Commands.AddReplicaCommand do
2525
to_atom(node)
2626
]) do
2727
{:error, :classic_queue_not_supported} ->
28-
{:error, "Cannot add replicas to a classic queue"}
28+
{:error, "Cannot add replicas to classic queues"}
2929

3030
{:error, :quorum_queue_not_supported} ->
31-
{:error, "Cannot add replicas to a quorum queue"}
31+
{:error, "Cannot add replicas to quorum queues"}
3232

3333
other ->
3434
other
@@ -37,11 +37,11 @@ defmodule RabbitMQ.CLI.Streams.Commands.AddReplicaCommand do
3737

3838
use RabbitMQ.CLI.DefaultOutput
3939

40-
def usage, do: "add_replica [--vhost <vhost>] <queue> <node>"
40+
def usage, do: "add_replica [--vhost <vhost>] <stream> <node>"
4141

4242
def usage_additional do
4343
[
44-
["<queue>", "stream queue name"],
44+
["<queue>", "stream name"],
4545
["<node>", "node to add a new replica on"]
4646
]
4747
end
@@ -54,11 +54,11 @@ defmodule RabbitMQ.CLI.Streams.Commands.AddReplicaCommand do
5454

5555
def help_section, do: :replication
5656

57-
def description, do: "Adds a stream queue replica on the given node."
57+
def description, do: "Adds a stream replica on the given node"
5858

5959
def banner([name, node], _) do
6060
[
61-
"Adding a replica for queue #{name} on node #{node}..."
61+
"Adding a replica for stream #{name} on node #{node}..."
6262
]
6363
end
6464
end

deps/rabbitmq_management/test/rabbit_mgmt_http_health_checks_SUITE.erl

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -198,8 +198,11 @@ is_quorum_critical_test(Config) ->
198198
Body = http_get_failed(Config, "/health/checks/node-is-quorum-critical"),
199199
?assertEqual(<<"failed">>, maps:get(<<"status">>, Body)),
200200
?assertEqual(true, maps:is_key(<<"reason">>, Body)),
201-
[Queue] = maps:get(<<"queues">>, Body),
202-
?assertEqual(QName, maps:get(<<"name">>, Queue)),
201+
Queues = maps:get(<<"queues">>, Body),
202+
?assert(lists:any(
203+
fun(Item) ->
204+
QName =:= maps:get(<<"name">>, Item)
205+
end, Queues)),
203206

204207
passed.
205208

0 commit comments

Comments
 (0)