From 0d78796631cda2a20fb67563187ef8a8aaed2dbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20P=C3=A9dron?= Date: Mon, 3 Mar 2025 16:56:36 +0100 Subject: [PATCH 1/5] v5_SUITE: Close all connections in `end_per_testcase/2` [Why] Many tests do not clean up their connections if they encounter a failure. This affects subsequent testcases negatively. (cherry picked from commit 97da746160a7e1f8306991d24cd106a1e5595d98) (cherry picked from commit 5c0d16e436f59adb57e0773430a66ab304be4af5) --- deps/rabbitmq_mqtt/test/v5_SUITE.erl | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/deps/rabbitmq_mqtt/test/v5_SUITE.erl b/deps/rabbitmq_mqtt/test/v5_SUITE.erl index a74cf0277bba..44a195094430 100644 --- a/deps/rabbitmq_mqtt/test/v5_SUITE.erl +++ b/deps/rabbitmq_mqtt/test/v5_SUITE.erl @@ -206,10 +206,27 @@ end_per_testcase(T, Config) -> end_per_testcase0(T, Config). end_per_testcase0(Testcase, Config) -> + %% Terminate all connections and wait for sessions to terminate before + %% starting the next test case. + _ = rabbit_ct_broker_helpers:rpc( + Config, 0, + rabbit_networking, close_all_connections, [<<"test finished">>]), + _ = rabbit_ct_broker_helpers:rpc_all( + Config, + rabbit_mqtt, close_local_client_connections, [normal]), + eventually(?_assertEqual( + [], + rpc(Config, rabbit_mqtt, local_connection_pids, []))), %% Assert that every testcase cleaned up their MQTT sessions. + rabbit_ct_broker_helpers:rpc(Config, 0, ?MODULE, delete_queues, []), eventually(?_assertEqual([], rpc(Config, rabbit_amqqueue, list, []))), rabbit_ct_helpers:testcase_finished(Config, Testcase). +delete_queues() -> + _ = [catch rabbit_amqqueue:delete(Q, false, false, <<"test finished">>) + || Q <- rabbit_amqqueue:list()], + ok. + %% ------------------------------------------------------------------- %% Testsuite cases %% ------------------------------------------------------------------- From df3c9e701d9fc69f56b017867962577e04792550 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20P=C3=A9dron?= Date: Tue, 25 Feb 2025 17:40:01 +0100 Subject: [PATCH 2/5] priority_queue_recovery_SUITE: Add suffix to RabbitMQ node names [Why] This helps debugging. (cherry picked from commit 28870f380ce8299ecaefd4e3fa1a9cd83bb98d10) (cherry picked from commit 3827f7b2422822ac2d9b4b20284ee099dc5c9e51) --- deps/rabbit/test/priority_queue_recovery_SUITE.erl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/deps/rabbit/test/priority_queue_recovery_SUITE.erl b/deps/rabbit/test/priority_queue_recovery_SUITE.erl index 9d6e7599daa0..b8792056d23a 100644 --- a/deps/rabbit/test/priority_queue_recovery_SUITE.erl +++ b/deps/rabbit/test/priority_queue_recovery_SUITE.erl @@ -35,8 +35,10 @@ end_per_suite(Config) -> rabbit_ct_helpers:run_teardown_steps(Config). init_per_group(_, Config) -> + Suffix = rabbit_ct_helpers:testcase_absname(Config, "", "-"), Config1 = rabbit_ct_helpers:set_config(Config, [ - {rmq_nodes_count, 2} + {rmq_nodes_count, 2}, + {rmq_nodename_suffix, Suffix} ]), rabbit_ct_helpers:run_steps(Config1, rabbit_ct_broker_helpers:setup_steps() ++ From 5af5235b0c8e075d48d0252c94f87339a56b41af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20P=C3=A9dron?= Date: Mon, 3 Mar 2025 10:48:43 +0100 Subject: [PATCH 3/5] logging_SUITE: Increase timetrap to 3 minutes [Why] We sometimes hit the 1-minute timetrap in CI even though the tests are running fine. (cherry picked from commit 43916da581a91fcb6b959cba71bed523daac2ac2) (cherry picked from commit 3e5c25485cf37c05e0e67aeb728b8be1b2593763) --- deps/rabbit/test/logging_SUITE.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deps/rabbit/test/logging_SUITE.erl b/deps/rabbit/test/logging_SUITE.erl index 696d0b5cded5..5e89034a51d5 100644 --- a/deps/rabbit/test/logging_SUITE.erl +++ b/deps/rabbit/test/logging_SUITE.erl @@ -57,7 +57,7 @@ logging_to_syslog_works/1]). suite() -> - [{timetrap, {minutes, 1}}]. + [{timetrap, {minutes, 3}}]. all() -> [ From 3da990346b4aa4e7bdfb8ccc66f651acc5bf0c87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20P=C3=A9dron?= Date: Thu, 27 Feb 2025 13:24:57 +0100 Subject: [PATCH 4/5] rabbit_stream_SUITE: Increase some timeouts (cherry picked from commit 0e7f92aba2292ca117d664e7e67529f118a258ac) (cherry picked from commit c83ed19f3e9e9a14eceeb2f284f53f790cd5845a) --- .../src/test/java/com/rabbitmq/stream/FailureTest.java | 2 ++ .../src/test/java/com/rabbitmq/stream/LeaderLocatorTest.java | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/deps/rabbitmq_stream/test/rabbit_stream_SUITE_data/src/test/java/com/rabbitmq/stream/FailureTest.java b/deps/rabbitmq_stream/test/rabbit_stream_SUITE_data/src/test/java/com/rabbitmq/stream/FailureTest.java index 889bbcad45bf..e613bc06b558 100644 --- a/deps/rabbitmq_stream/test/rabbit_stream_SUITE_data/src/test/java/com/rabbitmq/stream/FailureTest.java +++ b/deps/rabbitmq_stream/test/rabbit_stream_SUITE_data/src/test/java/com/rabbitmq/stream/FailureTest.java @@ -221,6 +221,7 @@ void noLostConfirmedMessagesWhenLeaderGoesAway() throws Exception { () -> { connected.set(false); + try { Thread.sleep(2000); } catch (Exception e) {} Client locator = cf.get(new Client.ClientParameters().port(streamPortNode2())); // wait until there's a new leader @@ -467,6 +468,7 @@ void consumerReattachesToOtherReplicaWhenReplicaGoesAway() throws Exception { // avoid long-running task in the IO thread executorService.submit( () -> { + try { Thread.sleep(2000); } catch (Exception e) {} Client.StreamMetadata m = metadataClient.metadata(stream).get(stream); int newReplicaPort = m.getReplicas().get(0).getPort(); diff --git a/deps/rabbitmq_stream/test/rabbit_stream_SUITE_data/src/test/java/com/rabbitmq/stream/LeaderLocatorTest.java b/deps/rabbitmq_stream/test/rabbit_stream_SUITE_data/src/test/java/com/rabbitmq/stream/LeaderLocatorTest.java index ae573987f34f..edf07310e523 100644 --- a/deps/rabbitmq_stream/test/rabbit_stream_SUITE_data/src/test/java/com/rabbitmq/stream/LeaderLocatorTest.java +++ b/deps/rabbitmq_stream/test/rabbit_stream_SUITE_data/src/test/java/com/rabbitmq/stream/LeaderLocatorTest.java @@ -28,6 +28,7 @@ import com.rabbitmq.stream.impl.Client.Response; import com.rabbitmq.stream.impl.Client.StreamMetadata; import java.util.Collections; +import java.time.Duration; import java.util.HashMap; import java.util.Map; import java.util.Set; @@ -57,7 +58,9 @@ void invalidLocatorShouldReturnError() { void clientLocalLocatorShouldMakeLeaderOnConnectedNode() { int[] ports = new int[] {TestUtils.streamPortNode1(), TestUtils.streamPortNode2()}; for (int port : ports) { - Client client = cf.get(new Client.ClientParameters().port(port)); + Client client = cf.get(new Client.ClientParameters() + .port(port) + .rpcTimeout(Duration.ofSeconds(30))); String s = UUID.randomUUID().toString(); try { Response response = From a1744a78313a3266bbdbcc93fd801604b2c28efb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20P=C3=A9dron?= Date: Wed, 26 Feb 2025 14:00:08 +0100 Subject: [PATCH 5/5] rabbitmq-run.mk: Stop node in `start-background-broker` in case of error [Why] The CLI sometimes crashes early because it fails to configure the Erlang distribution. Because we use two CLI commands to watch the start of RabbitMQ, if one of them fails, the Make recipe will exit with an error, leaving the RabbitMQ node running. [How] We use a shell trap to stop the node if the shell is about to exit with an error. While here, we retry the `await_startup` CLI command several times because this is the one failing the most. This is until the crash is understood and a proper fix is committed. (cherry picked from commit 3a278e7e7c48f05fdacdf90018f201b08c281b1c) (cherry picked from commit 88958f3723e7f6ab7fa957e272d071af552cce13) --- deps/rabbit_common/mk/rabbitmq-run.mk | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/deps/rabbit_common/mk/rabbitmq-run.mk b/deps/rabbit_common/mk/rabbitmq-run.mk index 8bdff07d64a9..a02a626faf43 100644 --- a/deps/rabbit_common/mk/rabbitmq-run.mk +++ b/deps/rabbit_common/mk/rabbitmq-run.mk @@ -320,10 +320,13 @@ start-background-broker: node-tmpdir $(DIST_TARGET) $(BASIC_SCRIPT_ENV_SETTINGS) \ $(RABBITMQ_SERVER) \ $(REDIRECT_STDIO) & + trap 'test "$$?" = 0 || $(MAKE) stop-node' EXIT && \ ERL_LIBS="$(DIST_ERL_LIBS)" \ $(RABBITMQCTL) -n $(RABBITMQ_NODENAME) wait --timeout $(RMQCTL_WAIT_TIMEOUT) $(RABBITMQ_PID_FILE) && \ - ERL_LIBS="$(DIST_ERL_LIBS)" \ - $(RABBITMQCTL) --node $(RABBITMQ_NODENAME) await_startup + for i in $$(seq 1 10); do \ + ERL_LIBS="$(DIST_ERL_LIBS)" $(RABBITMQCTL) -n $(RABBITMQ_NODENAME) await_startup || sleep 1; \ + done && \ + ERL_LIBS="$(DIST_ERL_LIBS)" $(RABBITMQCTL) -n $(RABBITMQ_NODENAME) await_startup start-rabbit-on-node: $(exec_verbose) ERL_LIBS="$(DIST_ERL_LIBS)" \