Skip to content

Commit 455a5a2

Browse files
committed
rabbitmq_federation: Add testcase for #10306
[Why] An upgrade scenario going from RabbitMQ 3.12.x to the upcoming 3.13.0 was shared in issue #10306 to demonstrate that the change of child ID format broke rolling upgrades when there are existing federated exchanges. [How] The testcase uses 5 nodes: * one upstream node * two "old" downstream nodes * two "new" downstream nodes The old downstream nodes are used to prepare a 2-node cluster that is about to be upgraded. The new downstream nodes are added to the cluster then the old downstream nodes are stopped to simulate that rolling upgrade. The child ID format was restored in the previous commit, thus there is no conversion to handle and the testcase should just work with a fresh 3.13.0+ cluster or with a mixed-version cluster with 3.12.x. It failed during the preparation of the previous commit to make sure it was effective.
1 parent 92ca64e commit 455a5a2

File tree

2 files changed

+123
-2
lines changed

2 files changed

+123
-2
lines changed

deps/rabbitmq_federation/BUILD.bazel

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ rabbitmq_integration_suite(
9696
"test/rabbit_federation_test_util.beam",
9797
],
9898
flaky = True,
99-
shard_count = 2,
99+
shard_count = 3,
100100
)
101101

102102
rabbitmq_integration_suite(

deps/rabbitmq_federation/test/exchange_SUITE.erl

Lines changed: 122 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,15 @@
3030
all() ->
3131
[
3232
{group, essential},
33-
{group, cluster_size_3}
33+
{group, cluster_size_3},
34+
{group, rolling_upgrade}
3435
].
3536

3637
groups() ->
3738
[
3839
{essential, [], essential()},
3940
{cluster_size_3, [], [max_hops]},
41+
{rolling_upgrade, [], [child_id_format]},
4042
{cycle_protection, [], [
4143
%% TBD: port from v3.10.x in an Erlang 25-compatible way
4244
]},
@@ -96,6 +98,12 @@ init_per_group(cluster_size_3 = Group, Config) ->
9698
{rmq_nodes_count, 3}
9799
]),
98100
init_per_group1(Group, Config1);
101+
init_per_group(rolling_upgrade = Group, Config) ->
102+
Config1 = rabbit_ct_helpers:set_config(Config, [
103+
{rmq_nodes_count, 5},
104+
{rmq_nodes_clustered, false}
105+
]),
106+
init_per_group1(Group, Config1);
99107
init_per_group(Group, Config) ->
100108
init_per_group1(Group, Config).
101109

@@ -539,6 +547,119 @@ lookup_exchange_status(Config) ->
539547
[key, uri, status, timestamp, id, supervisor, upstream]),
540548

541549
clean_up_federation_related_bits(Config).
550+
551+
child_id_format(Config) ->
552+
[UpstreamNode,
553+
OldNodeA,
554+
NewNodeB,
555+
OldNodeC,
556+
NewNodeD] = rabbit_ct_broker_helpers:get_node_configs(
557+
Config, nodename),
558+
559+
%% Create a cluster with the nodes running the old version of RabbitMQ in
560+
%% mixed-version testing.
561+
%%
562+
%% Note: we build this on the assumption that `rabbit_ct_broker_helpers'
563+
%% starts nodes this way:
564+
%% Node 1: the primary copy of RabbitMQ the test is started from
565+
%% Node 2: the secondary umbrella (if any)
566+
%% Node 3: the primary copy
567+
%% Node 4: the secondary umbrella
568+
%% ...
569+
%%
570+
%% Therefore, `UpstreamNode' will use the primary copy, `OldNodeA' the
571+
%% secondary umbrella, `NewNodeB' the primary copy, and so on.
572+
Config1 = rabbit_ct_broker_helpers:cluster_nodes(
573+
Config, [OldNodeA, OldNodeC]),
574+
575+
%% Prepare the whole federated exchange on that old cluster.
576+
UpstreamName = <<"fed_on_upgrade">>,
577+
rabbit_ct_broker_helpers:set_parameter(
578+
Config1, OldNodeA, <<"federation-upstream">>, UpstreamName,
579+
[
580+
{<<"uri">>, rabbit_ct_broker_helpers:node_uri(Config1, UpstreamNode)}
581+
]),
582+
583+
rabbit_ct_broker_helpers:set_policy(
584+
Config1, OldNodeA,
585+
<<"fed_on_upgrade_policy">>, <<"^fed_">>, <<"all">>,
586+
[
587+
{<<"federation-upstream-pattern">>, UpstreamName}
588+
]),
589+
590+
XName = <<"fed_ex_on_upgrade_cluster">>,
591+
X = exchange_declare_method(XName, <<"direct">>),
592+
{Conn1, Ch1} = rabbit_ct_client_helpers:open_connection_and_channel(
593+
Config1, OldNodeA),
594+
?assertEqual({'exchange.declare_ok'}, declare_exchange(Ch1, X)),
595+
rabbit_ct_client_helpers:close_channel(Ch1),
596+
rabbit_ct_client_helpers:close_connection(Conn1),
597+
598+
%% Verify the format of the child ID. In the main branch, the format was
599+
%% temporarily a size-2 tuple with a list as the first element. This was
600+
%% not kept later and the original ID format is used in old and new nodes.
601+
[{Id, _, _, _}] = rabbit_ct_broker_helpers:rpc(
602+
Config1, OldNodeA,
603+
mirrored_supervisor, which_children,
604+
[rabbit_federation_exchange_link_sup_sup]),
605+
case Id of
606+
%% This is the format we expect everywhere.
607+
#exchange{name = #resource{name = XName}} ->
608+
%% Verify that the supervisors exist on all nodes.
609+
lists:foreach(
610+
fun(Node) ->
611+
?assertMatch(
612+
[{#exchange{name = #resource{name = XName}},
613+
_, _, _}],
614+
rabbit_ct_broker_helpers:rpc(
615+
Config1, Node,
616+
mirrored_supervisor, which_children,
617+
[rabbit_federation_exchange_link_sup_sup]))
618+
end, [OldNodeA, OldNodeC]),
619+
620+
%% Simulate a rolling upgrade by:
621+
%% 1. adding new nodes to the old cluster
622+
%% 2. stopping the old nodes
623+
%%
624+
%% After that, the supervisors run on the new code.
625+
Config2 = rabbit_ct_broker_helpers:cluster_nodes(
626+
Config1, [OldNodeA, NewNodeB, NewNodeD]),
627+
ok = rabbit_ct_broker_helpers:stop_broker(Config2, OldNodeA),
628+
ok = rabbit_ct_broker_helpers:reset_node(Config1, OldNodeA),
629+
ok = rabbit_ct_broker_helpers:stop_broker(Config2, OldNodeC),
630+
ok = rabbit_ct_broker_helpers:reset_node(Config2, OldNodeC),
631+
632+
%% Verify that the supervisors still use the same IDs.
633+
lists:foreach(
634+
fun(Node) ->
635+
?assertMatch(
636+
[{#exchange{name = #resource{name = XName}},
637+
_, _, _}],
638+
rabbit_ct_broker_helpers:rpc(
639+
Config2, Node,
640+
mirrored_supervisor, which_children,
641+
[rabbit_federation_exchange_link_sup_sup]))
642+
end, [NewNodeB, NewNodeD]),
643+
644+
%% Delete the exchange: it should work because the ID format is the
645+
%% one expected.
646+
%%
647+
%% During the transient period where the ID format was changed,
648+
%% this would crash with a badmatch because the running
649+
%% supervisor's ID would not match the content of the database.
650+
{Conn2, Ch2} = rabbit_ct_client_helpers:open_connection_and_channel(
651+
Config2, NewNodeB),
652+
?assertEqual({'exchange.delete_ok'}, delete_exchange(Ch2, XName)),
653+
rabbit_ct_client_helpers:close_channel(Ch2),
654+
rabbit_ct_client_helpers:close_connection(Conn2);
655+
656+
%% This is the transient format we are not interested in as it only
657+
%% lived in a development branch.
658+
{List, #exchange{name = #resource{name = XName}}}
659+
when is_list(List) ->
660+
{skip, "Testcase skipped with the transiently changed ID format"}
661+
end.
662+
542663
%%
543664
%% Test helpers
544665
%%

0 commit comments

Comments
 (0)