Skip to content

Commit d01059a

Browse files
mkuratczykmergify[bot]
authored andcommitted
await quorum+1 improvements
1. If khepri_db is enabled, rabbitmq_metadata is a critical component 2. When waiting for quorum+1, periodically log what doesn't have the quorum+1 - for components: just list them - for queues: list how many we are waiting for and how to display them (because there could be a large number, logging that could be impractical or even dangerous) 3. make the tests signficantly faster by using a single group (cherry picked from commit 6ca2022) (cherry picked from commit 547fc95)
1 parent dce5ecd commit d01059a

File tree

2 files changed

+48
-27
lines changed

2 files changed

+48
-27
lines changed

deps/rabbit/src/rabbit_upgrade_preparation.erl

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
%%
1818

1919
-define(SAMPLING_INTERVAL, 200).
20+
-define(LOGGING_FREQUENCY, ?SAMPLING_INTERVAL * 100).
2021

2122
await_online_quorum_plus_one(Timeout) ->
2223
Iterations = ceil(Timeout / ?SAMPLING_INTERVAL),
@@ -38,7 +39,11 @@ online_members(Component) ->
3839
erlang, whereis, [Component])).
3940

4041
endangered_critical_components() ->
41-
CriticalComponents = [rabbit_stream_coordinator],
42+
CriticalComponents = [rabbit_stream_coordinator] ++
43+
case rabbit_feature_flags:is_enabled(khepri_db) of
44+
true -> [rabbitmq_metadata];
45+
false -> []
46+
end,
4247
Nodes = rabbit_nodes:list_members(),
4348
lists:filter(fun (Component) ->
4449
NumAlive = length(online_members(Component)),
@@ -65,6 +70,21 @@ do_await_safe_online_quorum(IterationsLeft) ->
6570
case EndangeredQueues =:= [] andalso endangered_critical_components() =:= [] of
6671
true -> true;
6772
false ->
73+
case IterationsLeft rem ?LOGGING_FREQUENCY of
74+
0 ->
75+
case length(EndangeredQueues) of
76+
0 -> ok;
77+
N -> rabbit_log:info("Waiting for ~p queues to have quorum+1 members online."
78+
"You can list them with `rabbitmq-diagnostics check_if_node_is_quorum_critical`", [N])
79+
end,
80+
case endangered_critical_components() of
81+
[] -> ok;
82+
_ -> rabbit_log:info("Waiting for the following critical components to have quorum+1 members online: ~p.",
83+
[endangered_critical_components()])
84+
end;
85+
_ ->
86+
ok
87+
end,
6888
timer:sleep(?SAMPLING_INTERVAL),
6989
do_await_safe_online_quorum(IterationsLeft - 1)
7090
end.

deps/rabbit/test/upgrade_preparation_SUITE.erl

Lines changed: 27 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -15,20 +15,16 @@
1515

1616
all() ->
1717
[
18-
{group, quorum_queue},
19-
{group, stream}
18+
{group, clustered}
2019
].
2120

2221
groups() ->
2322
[
24-
{quorum_queue, [], [
25-
await_quorum_plus_one_qq
26-
]},
27-
{stream, [], [
28-
await_quorum_plus_one_stream
29-
]},
30-
{stream_coordinator, [], [
31-
await_quorum_plus_one_stream_coordinator
23+
{clustered, [], [
24+
await_quorum_plus_one_qq,
25+
await_quorum_plus_one_stream,
26+
await_quorum_plus_one_stream_coordinator,
27+
await_quorum_plus_one_rabbitmq_metadata
3228
]}
3329
].
3430

@@ -45,21 +41,14 @@ end_per_suite(Config) ->
4541
rabbit_ct_helpers:run_teardown_steps(Config).
4642

4743
init_per_group(Group, Config) ->
48-
case rabbit_ct_helpers:is_mixed_versions() of
49-
true ->
50-
%% in a 3.8/3.9 mixed cluster, ra will not cluster across versions,
51-
%% so quorum plus one will not be achieved
52-
{skip, "not mixed versions compatible"};
53-
_ ->
54-
Config1 = rabbit_ct_helpers:set_config(Config,
55-
[
56-
{rmq_nodes_count, 3},
57-
{rmq_nodename_suffix, Group}
58-
]),
59-
rabbit_ct_helpers:run_steps(Config1,
60-
rabbit_ct_broker_helpers:setup_steps() ++
61-
rabbit_ct_client_helpers:setup_steps())
62-
end.
44+
Config1 = rabbit_ct_helpers:set_config(Config,
45+
[
46+
{rmq_nodes_count, 3},
47+
{rmq_nodename_suffix, Group}
48+
]),
49+
rabbit_ct_helpers:run_steps(Config1,
50+
rabbit_ct_broker_helpers:setup_steps() ++
51+
rabbit_ct_client_helpers:setup_steps()).
6352

6453
end_per_group(_Group, Config) ->
6554
rabbit_ct_helpers:run_steps(Config,
@@ -121,12 +110,24 @@ await_quorum_plus_one_stream_coordinator(Config) ->
121110
%% no queues/streams beyond this point
122111

123112
ok = rabbit_ct_broker_helpers:stop_node(Config, B),
124-
%% this should fail because the corrdinator has only 2 running nodes
113+
%% this should fail because the coordinator has only 2 running nodes
125114
?assertNot(await_quorum_plus_one(Config, 0)),
126115

127116
ok = rabbit_ct_broker_helpers:start_node(Config, B),
128117
?assert(await_quorum_plus_one(Config, 0)).
129118

119+
await_quorum_plus_one_rabbitmq_metadata(Config) ->
120+
Nodes = [A, B, _C] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename),
121+
ok = rabbit_ct_broker_helpers:enable_feature_flag(Config, Nodes, khepri_db),
122+
?assert(await_quorum_plus_one(Config, A)),
123+
124+
ok = rabbit_ct_broker_helpers:stop_node(Config, B),
125+
%% this should fail because rabbitmq_metadata has only 2 running nodes
126+
?assertNot(await_quorum_plus_one(Config, A)),
127+
128+
ok = rabbit_ct_broker_helpers:start_node(Config, B),
129+
?assert(await_quorum_plus_one(Config, A)).
130+
130131
%%
131132
%% Implementation
132133
%%

0 commit comments

Comments
 (0)