Skip to content

Commit 1e44604

Browse files
committed
QQ Reconciliator - add comments to SUITE, add new group for explicit triggers tests
1 parent d47d7f9 commit 1e44604

File tree

1 file changed

+80
-29
lines changed

1 file changed

+80
-29
lines changed

deps/rabbit/test/quorum_queue_member_reconciliation_SUITE.erl

Lines changed: 80 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -12,39 +12,69 @@
1212
-include_lib("amqp_client/include/amqp_client.hrl").
1313
-compile([nowarn_export_all, export_all]).
1414

15+
%% The reconciler has two modes of triggering itself
16+
%% - timer based
17+
%% - event based
18+
%% The default config of this test has Interval very short - 5 second which is lower than
19+
%% wait_until timeout. Meaninig that even if all domain triggers (node_up/down, policy_set, etc)
20+
%% are disconnected tests would be still green.
21+
%% So to test triggers it is essential to set Interval high enough (the very default value of 60 minutes is perfect)
22+
%%
23+
%% TODO: test `policy_set` trigger
1524

1625
all() ->
1726
[
18-
{group, unclustered}
27+
{group, unclustered},
28+
{group, unclustered_triggers}
1929
].
2030

2131
groups() ->
2232
[
23-
{unclustered, [],
33+
{unclustered, [], %% low interval, even if triggers do not work all tests should pass
2434
[
2535
{quorum_queue_3, [], [auto_grow, auto_grow_drained_node, auto_shrink]}
36+
]},
37+
{unclustered_triggers, [], %% large interval (larger than `wait_until`(30sec))
38+
[ %% could pass only if triggers work, see also `auto_grow_drained_node`
39+
{quorum_queue_3, [], [auto_grow, auto_shrink]}
2640
]}
2741
].
2842

2943
%% -------------------------------------------------------------------
3044
%% Testsuite setup/teardown.
3145
%% -------------------------------------------------------------------
3246

33-
init_per_suite(Config0) ->
47+
init_per_suite(Config) ->
3448
rabbit_ct_helpers:log_environment(),
49+
rabbit_ct_helpers:run_setup_steps(Config, []).
50+
51+
end_per_suite(Config) ->
52+
rabbit_ct_helpers:run_teardown_steps(Config).
53+
54+
init_per_group(unclustered, Config0) ->
3555
Config1 = rabbit_ct_helpers:merge_app_env(
3656
Config0, {rabbit, [{quorum_tick_interval, 1000},
3757
{quorum_membership_reconciliation_enabled, true},
3858
{quorum_membership_reconciliation_auto_remove, true},
3959
{quorum_membership_reconciliation_interval, 5000},
4060
{quorum_membership_reconciliation_trigger_interval, 2000},
4161
{quorum_membership_reconciliation_target_group_size, 3}]}),
42-
rabbit_ct_helpers:run_setup_steps(Config1, []).
43-
44-
end_per_suite(Config) ->
45-
rabbit_ct_helpers:run_teardown_steps(Config).
46-
init_per_group(unclustered, Config) ->
47-
rabbit_ct_helpers:set_config(Config, [{rmq_nodes_clustered, false}]);
62+
rabbit_ct_helpers:set_config(Config1, [{rmq_nodes_clustered, false}]);
63+
init_per_group(unclustered_triggers, Config0) ->
64+
Config1 = rabbit_ct_helpers:merge_app_env(
65+
Config0, {rabbit, [{quorum_tick_interval, 1000},
66+
{quorum_membership_reconciliation_enabled, true},
67+
{quorum_membership_reconciliation_auto_remove, true},
68+
{quorum_membership_reconciliation_interval, 50000},
69+
{quorum_membership_reconciliation_trigger_interval, 2000},
70+
{quorum_membership_reconciliation_target_group_size, 3}]}),
71+
%% shrink timeout set here because otherwise when node stopped right after queue created
72+
%% the test will be green without triggers because cluster change will likely fall within trigger_interval
73+
%% which will be set as a new timer value by queue_created trigger.
74+
%% See also `auto_shrink/1` comment
75+
rabbit_ct_helpers:set_config(Config1, [{rmq_nodes_clustered, false},
76+
{quorum_membership_reconciliation_interval, 50000},
77+
{shrink_timeout, 2000}]);
4878
init_per_group(Group, Config) ->
4979
ClusterSize = 3,
5080
Config1 = rabbit_ct_helpers:set_config(Config,
@@ -57,6 +87,8 @@ init_per_group(Group, Config) ->
5787

5888
end_per_group(unclustered, Config) ->
5989
Config;
90+
end_per_group(unclustered_triggers, Config) ->
91+
Config;
6092
end_per_group(_, Config) ->
6193
rabbit_ct_helpers:run_steps(Config,
6294
rabbit_ct_broker_helpers:teardown_steps()).
@@ -72,34 +104,17 @@ init_per_testcase(Testcase, Config) ->
72104
]),
73105
rabbit_ct_helpers:run_steps(Config2, rabbit_ct_client_helpers:setup_steps()).
74106

75-
merge_app_env(Config) ->
76-
rabbit_ct_helpers:merge_app_env(
77-
rabbit_ct_helpers:merge_app_env(Config,
78-
{rabbit, [{core_metrics_gc_interval, 100}]}),
79-
{ra, [{min_wal_roll_over_interval, 30000}]}).
80-
81107
end_per_testcase(Testcase, Config) ->
82108
[Server0, Server1, Server2] =
83109
rabbit_ct_broker_helpers:get_node_configs(Config, nodename),
110+
Ch = rabbit_ct_client_helpers:open_channel(Config, Server1),
111+
amqp_channel:call(Ch, #'queue.delete'{queue = rabbit_data_coercion:to_binary(Testcase)}),
84112
reset_nodes([Server2, Server0], Server1),
85113
Config1 = rabbit_ct_helpers:run_steps(
86114
Config,
87115
rabbit_ct_client_helpers:teardown_steps()),
88116
rabbit_ct_helpers:testcase_finished(Config1, Testcase).
89117

90-
reset_nodes([], _Leader) ->
91-
ok;
92-
reset_nodes([Node| Nodes], Leader) ->
93-
ok = rabbit_control_helper:command(stop_app, Node),
94-
case rabbit_control_helper:command(forget_cluster_node, Leader, [atom_to_list(Node)]) of
95-
ok -> ok;
96-
{error, _, <<"Error:\n{:not_a_cluster_node, ~c\"The node selected is not in the cluster.\"}">>} -> ok
97-
end,
98-
ok = rabbit_control_helper:command(reset, Node),
99-
ok = rabbit_control_helper:command(start_app, Node),
100-
reset_nodes(Nodes, Leader).
101-
102-
103118
%% -------------------------------------------------------------------
104119
%% Testcases.
105120
%% -------------------------------------------------------------------
@@ -134,6 +149,10 @@ auto_grow(Config) ->
134149
end).
135150

136151
auto_grow_drained_node(Config) ->
152+
%% NOTE: with large Interval (larger than wait_until) test will fail.
153+
%% the reason is that entering/exiting drain state does not emit events
154+
%% and even if they did via gen_event, they going to be only local to that node.
155+
%% so reconciliator has no choice but to wait full Interval
137156
[Server0, Server1, Server2] =
138157
rabbit_ct_broker_helpers:get_node_configs(Config, nodename),
139158
Ch = rabbit_ct_client_helpers:open_channel(Config, Server1),
@@ -169,7 +188,6 @@ auto_grow_drained_node(Config) ->
169188
3 =:= length(M)
170189
end).
171190

172-
173191
auto_shrink(Config) ->
174192
[Server0, Server1, Server2] =
175193
rabbit_ct_broker_helpers:get_node_configs(Config, nodename),
@@ -186,6 +204,19 @@ auto_shrink(Config) ->
186204
Server1}),
187205
3 =:= length(M)
188206
end),
207+
208+
%% The logic of reconciliator is interesting - when it is triggered it actually postpones
209+
%% any action untill trigger_interval.
210+
%% So if this test wants to test that reconciliator reacts to node_down or similar
211+
%% it has to wait at least trigger_interval before removing node. Otherwise
212+
%% the shrink effect would come from the previous trigger (which in our case is queue declaration)
213+
%%
214+
%% The key here is that when `queue_created` trigger switches timer to trigger_interval the queue has 3 nodes
215+
%% and at least locally stop_app works fast enough so that when trigger_interval elapsed, the number of Members
216+
%% will be changed without any need for node_down.
217+
218+
timer:sleep(rabbit_ct_helpers:get_config(Config, shrink_timeout, 0)),
219+
189220
ok = rabbit_control_helper:command(stop_app, Server2),
190221
ok = rabbit_ct_broker_helpers:rpc(Config, 0, rabbit_db_cluster, forget_member,
191222
[Server2, false]),
@@ -196,7 +227,27 @@ auto_shrink(Config) ->
196227
2 =:= length(M)
197228
end).
198229

230+
%% -------------------------------------------------------------------
231+
%% Helpers.
232+
%% -------------------------------------------------------------------
199233

234+
merge_app_env(Config) ->
235+
rabbit_ct_helpers:merge_app_env(
236+
rabbit_ct_helpers:merge_app_env(Config,
237+
{rabbit, [{core_metrics_gc_interval, 100}]}),
238+
{ra, [{min_wal_roll_over_interval, 30000}]}).
239+
240+
reset_nodes([], _Leader) ->
241+
ok;
242+
reset_nodes([Node| Nodes], Leader) ->
243+
ok = rabbit_control_helper:command(stop_app, Node),
244+
case rabbit_control_helper:command(forget_cluster_node, Leader, [atom_to_list(Node)]) of
245+
ok -> ok;
246+
{error, _, <<"Error:\n{:not_a_cluster_node, ~c\"The node selected is not in the cluster.\"}">>} -> ok
247+
end,
248+
ok = rabbit_control_helper:command(reset, Node),
249+
ok = rabbit_control_helper:command(start_app, Node),
250+
reset_nodes(Nodes, Leader).
200251

201252
add_server_to_cluster(Server, Leader) ->
202253
ok = rabbit_control_helper:command(stop_app, Server),

0 commit comments

Comments
 (0)