Skip to content

Commit b79408d

Browse files
committed
QQ Reconciliator - add comments to SUITE, add new group for explicit triggers tests
1 parent f53c623 commit b79408d

File tree

1 file changed

+82
-29
lines changed

1 file changed

+82
-29
lines changed

deps/rabbit/test/quorum_queue_member_reconciliation_SUITE.erl

Lines changed: 82 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -12,39 +12,69 @@
1212
-include_lib("amqp_client/include/amqp_client.hrl").
1313
-compile([nowarn_export_all, export_all]).
1414

15+
%% The reconciler has two modes of triggering itself
16+
%% - timer based
17+
%% - event based
18+
%% The default config of this test has Interval very short - 5 second which is lower than
19+
%% wait_until timeout. Meaninig that even if all domain triggers (node_up/down, policy_set, etc)
20+
%% are disconnected tests would be still green.
21+
%% So to test triggers it is essential to set Interval high enough (the very default value of 60 minutes is perfect)
22+
%%
23+
%% TODO: test `policy_set` trigger
1524

1625
all() ->
1726
[
18-
{group, unclustered}
27+
{group, unclustered},
28+
{group, unclustered_triggers}
1929
].
2030

2131
groups() ->
2232
[
23-
{unclustered, [],
33+
{unclustered, [], %% low interval, even if triggers do not work all tests should pass
2434
[
2535
{quorum_queue_3, [], [auto_grow, auto_grow_drained_node, auto_shrink]}
36+
]},
37+
{unclustered_triggers, [], %% large intereval (larger than `wait_until`(30sec))
38+
[ %% could pass only if triggers work see also `auto_grow_drained_node`
39+
{quorum_queue_3, [], [auto_grow, auto_shrink]}
2640
]}
2741
].
2842

2943
%% -------------------------------------------------------------------
3044
%% Testsuite setup/teardown.
3145
%% -------------------------------------------------------------------
3246

33-
init_per_suite(Config0) ->
47+
init_per_suite(Config) ->
3448
rabbit_ct_helpers:log_environment(),
49+
rabbit_ct_helpers:run_setup_steps(Config, []).
50+
51+
end_per_suite(Config) ->
52+
rabbit_ct_helpers:run_teardown_steps(Config).
53+
54+
init_per_group(unclustered, Config0) ->
3555
Config1 = rabbit_ct_helpers:merge_app_env(
3656
Config0, {rabbit, [{quorum_tick_interval, 1000},
3757
{quorum_membership_reconciliation_enabled, true},
3858
{quorum_membership_reconciliation_auto_remove, true},
3959
{quorum_membership_reconciliation_interval, 5000},
4060
{quorum_membership_reconciliation_trigger_interval, 2000},
4161
{quorum_membership_reconciliation_target_group_size, 3}]}),
42-
rabbit_ct_helpers:run_setup_steps(Config1, []).
43-
44-
end_per_suite(Config) ->
45-
rabbit_ct_helpers:run_teardown_steps(Config).
46-
init_per_group(unclustered, Config) ->
47-
rabbit_ct_helpers:set_config(Config, [{rmq_nodes_clustered, false}]);
62+
rabbit_ct_helpers:set_config(Config1, [{rmq_nodes_clustered, false}]);
63+
init_per_group(unclustered_triggers, Config0) ->
64+
Config1 = rabbit_ct_helpers:merge_app_env(
65+
Config0, {rabbit, [{quorum_tick_interval, 1000},
66+
{quorum_membership_reconciliation_enabled, true},
67+
{quorum_membership_reconciliation_auto_remove, true},
68+
{quorum_membership_reconciliation_interval, 50000},
69+
{quorum_membership_reconciliation_trigger_interval, 2000},
70+
{quorum_membership_reconciliation_target_group_size, 3}]}),
71+
%% shrink timeout set here, because otherwise when node stopped right after queue created
72+
%% it will be green without triggers because it will likely fall withing trigger_interval
73+
%% which will be set as a new timer by new queue trigger.
74+
%% See also `auto_shrink/1` comment
75+
rabbit_ct_helpers:set_config(Config1, [{rmq_nodes_clustered, false},
76+
{quorum_membership_reconciliation_interval, 50000},
77+
{shrink_timeout, 2000}]);
4878
init_per_group(Group, Config) ->
4979
ClusterSize = 3,
5080
Config1 = rabbit_ct_helpers:set_config(Config,
@@ -56,6 +86,10 @@ init_per_group(Group, Config) ->
5686
rabbit_ct_broker_helpers:setup_steps()).
5787

5888
end_per_group(unclustered, Config) ->
89+
timer:sleep(5000),
90+
%%
91+
Config;
92+
end_per_group(unclustered_triggers, Config) ->
5993
Config;
6094
end_per_group(_, Config) ->
6195
rabbit_ct_helpers:run_steps(Config,
@@ -72,34 +106,17 @@ init_per_testcase(Testcase, Config) ->
72106
]),
73107
rabbit_ct_helpers:run_steps(Config2, rabbit_ct_client_helpers:setup_steps()).
74108

75-
merge_app_env(Config) ->
76-
rabbit_ct_helpers:merge_app_env(
77-
rabbit_ct_helpers:merge_app_env(Config,
78-
{rabbit, [{core_metrics_gc_interval, 100}]}),
79-
{ra, [{min_wal_roll_over_interval, 30000}]}).
80-
81109
end_per_testcase(Testcase, Config) ->
82110
[Server0, Server1, Server2] =
83111
rabbit_ct_broker_helpers:get_node_configs(Config, nodename),
112+
Ch = rabbit_ct_client_helpers:open_channel(Config, Server1),
113+
amqp_channel:call(Ch, #'queue.delete'{queue = rabbit_data_coercion:to_binary(Testcase)}),
84114
reset_nodes([Server2, Server0], Server1),
85115
Config1 = rabbit_ct_helpers:run_steps(
86116
Config,
87117
rabbit_ct_client_helpers:teardown_steps()),
88118
rabbit_ct_helpers:testcase_finished(Config1, Testcase).
89119

90-
reset_nodes([], _Leader) ->
91-
ok;
92-
reset_nodes([Node| Nodes], Leader) ->
93-
ok = rabbit_control_helper:command(stop_app, Node),
94-
case rabbit_control_helper:command(forget_cluster_node, Leader, [atom_to_list(Node)]) of
95-
ok -> ok;
96-
{error, _, <<"Error:\n{:not_a_cluster_node, ~c\"The node selected is not in the cluster.\"}">>} -> ok
97-
end,
98-
ok = rabbit_control_helper:command(reset, Node),
99-
ok = rabbit_control_helper:command(start_app, Node),
100-
reset_nodes(Nodes, Leader).
101-
102-
103120
%% -------------------------------------------------------------------
104121
%% Testcases.
105122
%% -------------------------------------------------------------------
@@ -134,6 +151,10 @@ auto_grow(Config) ->
134151
end).
135152

136153
auto_grow_drained_node(Config) ->
154+
%% NOTE: with large Interval (larger than wait_until) test will fail.
155+
%% the reason is that entering/exiting drain state does not emit events
156+
%% and even if they did via gen_event, they going to be only local to that node.
157+
%% so reconciliator has no choice but to wait full Interval
137158
[Server0, Server1, Server2] =
138159
rabbit_ct_broker_helpers:get_node_configs(Config, nodename),
139160
Ch = rabbit_ct_client_helpers:open_channel(Config, Server1),
@@ -169,7 +190,6 @@ auto_grow_drained_node(Config) ->
169190
3 =:= length(M)
170191
end).
171192

172-
173193
auto_shrink(Config) ->
174194
[Server0, Server1, Server2] =
175195
rabbit_ct_broker_helpers:get_node_configs(Config, nodename),
@@ -186,6 +206,19 @@ auto_shrink(Config) ->
186206
Server1}),
187207
3 =:= length(M)
188208
end),
209+
210+
%% The logic of reconciliator is interesting - when it is triggered it actually postpones
211+
%% any action till trigger_interval
212+
%% So if this test wants to test that reconciliator reacts to node_down or similar
213+
%% it has to wait at least trigger_interval before removing node because otherwise
214+
%% the shrink effect would come from the previous trigger (which in our case is queue declaration)
215+
%%
216+
%% the key here is that when `queue_created` switches timer to trigger_interval the queue has 3 nodes
217+
%% and at least locally stop_app works fast enought so that when timer tiggered, the number of nodes
218+
%% will be changed not from on_node_down but from the global state.
219+
220+
timer:sleep(rabbit_ct_helpers:get_config(Config, shrink_timeout, 0)),
221+
189222
ok = rabbit_control_helper:command(stop_app, Server2),
190223
ok = rabbit_ct_broker_helpers:rpc(Config, 0, rabbit_db_cluster, forget_member,
191224
[Server2, false]),
@@ -196,7 +229,27 @@ auto_shrink(Config) ->
196229
2 =:= length(M)
197230
end).
198231

232+
%% -------------------------------------------------------------------
233+
%% Helpers.
234+
%% -------------------------------------------------------------------
199235

236+
merge_app_env(Config) ->
237+
rabbit_ct_helpers:merge_app_env(
238+
rabbit_ct_helpers:merge_app_env(Config,
239+
{rabbit, [{core_metrics_gc_interval, 100}]}),
240+
{ra, [{min_wal_roll_over_interval, 30000}]}).
241+
242+
reset_nodes([], _Leader) ->
243+
ok;
244+
reset_nodes([Node| Nodes], Leader) ->
245+
ok = rabbit_control_helper:command(stop_app, Node),
246+
case rabbit_control_helper:command(forget_cluster_node, Leader, [atom_to_list(Node)]) of
247+
ok -> ok;
248+
{error, _, <<"Error:\n{:not_a_cluster_node, ~c\"The node selected is not in the cluster.\"}">>} -> ok
249+
end,
250+
ok = rabbit_control_helper:command(reset, Node),
251+
ok = rabbit_control_helper:command(start_app, Node),
252+
reset_nodes(Nodes, Leader).
200253

201254
add_server_to_cluster(Server, Leader) ->
202255
ok = rabbit_control_helper:command(stop_app, Server),

0 commit comments

Comments
 (0)