Skip to content

Commit 18487c1

Browse files
committed
tests: attempt to fix RabbitMQ tests (pause/unpause breaks RabbitMQ)
I was looking at [1] failure on CI, all problems pops up there after unpause of rabbitmq container failed: # dockerd.log time="2025-05-21T11:07:28.863531720Z" level=debug msg="Calling POST /v1.46/containers/1f6103a506ad393e4ff79565ff27e01e73d760722a0412b66f3e0ac3759b2905/unpause" time="2025-05-21T11:07:54.715714117Z" level=warning msg="Health check for container 1f6103a506ad393e4ff79565ff27e01e73d760722a0412b66f3e0ac3759b2905 error: timed out starting health check for container 1f6103a506ad393e4ff79565ff27e01e73d760722a0412b66f3e0ac3759b2905" # pytest logs cat test_storage_rabbitmq_test_py_0.jsonl | jq -r '[.nodeid, .outcome, .duration] | @TSV' test_storage_rabbitmq/test.py::test_rabbitmq_no_connection_at_startup_1 passed 0.06497019900052692 test_storage_rabbitmq/test.py::test_rabbitmq_no_connection_at_startup_1 passed 4.069896273000268 test_storage_rabbitmq/test.py::test_rabbitmq_no_connection_at_startup_1 passed 2.9214494500010915 test_storage_rabbitmq/test.py::test_rabbitmq_no_connection_at_startup_2 passed 0.06504274200051441 test_storage_rabbitmq/test.py::test_rabbitmq_no_connection_at_startup_2 failed 305.5343935210003 test_storage_rabbitmq/test.py::test_rabbitmq_no_connection_at_startup_2 failed 120.9480928060002 test_storage_rabbitmq/test.py::test_rabbitmq_format_factory_settings passed 0.06524586100022134 test_storage_rabbitmq/test.py::test_rabbitmq_format_factory_settings failed 4.070239695000055 test_storage_rabbitmq/test.py::test_rabbitmq_format_factory_settings failed 120.14076365499932 test_storage_rabbitmq/test.py::test_rabbitmq_vhost passed 0.06514747200162674 test_storage_rabbitmq/test.py::test_rabbitmq_vhost failed 4.070600553000986 test_storage_rabbitmq/test.py::test_rabbitmq_vhost failed 120.16239366799891 As you can see all tests timed out after test_rabbitmq_no_connection_at_startup_2 failure. So let's try to properly pause it using stop_app/start_app. [1]: https://s3.amazonaws.com/clickhouse-test-reports/json.html?REF=master&sha=87a5d902c552e9bf1502023611f7341b665754e0&name_0=MasterCI&name_1=Integration%20tests%20%28aarch64%2C%20distributed%20plan%2C%202%2F4%29
1 parent f7b89bd commit 18487c1

File tree

3 files changed

+25
-29
lines changed

3 files changed

+25
-29
lines changed

tests/integration/helpers/cluster.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2480,7 +2480,20 @@ def start_rabbitmq_app(self, timeout=120):
24802480
run_rabbitmqctl(
24812481
self.rabbitmq_docker_id, self.rabbitmq_cookie, "start_app", timeout
24822482
)
2483-
self.wait_rabbitmq_to_start()
2483+
self.wait_rabbitmq_to_start(timeout)
2484+
2485+
@contextmanager
2486+
def pause_rabbitmq(self, monitor=None, timeout=120):
2487+
if monitor is not None:
2488+
monitor.stop()
2489+
self.stop_rabbitmq_app(timeout)
2490+
2491+
try:
2492+
yield
2493+
finally:
2494+
self.start_rabbitmq_app(timeout)
2495+
if monitor is not None:
2496+
monitor.start(self)
24842497

24852498
def reset_rabbitmq(self, timeout=120):
24862499
self.stop_rabbitmq_app()

tests/integration/test_storage_rabbitmq/test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2210,7 +2210,7 @@ def test_rabbitmq_no_connection_at_startup_2(rabbitmq_cluster):
22102210
)
22112211
instance.query("DETACH TABLE test.cs")
22122212

2213-
with rabbitmq_cluster.pause_container("rabbitmq1"):
2213+
with rabbitmq_cluster.pause_rabbitmq():
22142214
instance.query("ATTACH TABLE test.cs")
22152215

22162216
messages_num = 1000

tests/integration/test_storage_rabbitmq/test_failed_connection.py

Lines changed: 10 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -126,17 +126,6 @@ def stop(self):
126126
self.connection = None
127127

128128

129-
def suspend_rabbitmq(rabbitmq_cluster, rabbitmq_monitor):
130-
rabbitmq_monitor.stop()
131-
rabbitmq_cluster.stop_rabbitmq_app()
132-
133-
134-
def resume_rabbitmq(rabbitmq_cluster, rabbitmq_monitor):
135-
rabbitmq_cluster.start_rabbitmq_app()
136-
rabbitmq_cluster.wait_rabbitmq_to_start()
137-
rabbitmq_monitor.start(rabbitmq_cluster)
138-
139-
140129
# Fixtures
141130

142131
@pytest.fixture(scope="module")
@@ -234,14 +223,11 @@ def test_rabbitmq_restore_failed_connection_without_losses_1(rabbitmq_cluster, r
234223
else:
235224
pytest.fail(f"Time limit of {DEFAULT_TIMEOUT_SEC} seconds reached. The count is still 0.")
236225

237-
suspend_rabbitmq(rabbitmq_cluster, rabbitmq_monitor)
238-
239-
number = int(instance.query("SELECT count() FROM test.view"))
240-
logging.debug(f"{number}/{messages_num} after suspending RabbitMQ")
241-
if number == messages_num:
242-
pytest.fail("All RabbitMQ messages have been consumed before resuming the RabbitMQ server")
243-
244-
resume_rabbitmq(rabbitmq_cluster, rabbitmq_monitor)
226+
with rabbitmq_cluster.pause_rabbitmq(rabbitmq_monitor):
227+
number = int(instance.query("SELECT count() FROM test.view"))
228+
logging.debug(f"{number}/{messages_num} after suspending RabbitMQ")
229+
if number == messages_num:
230+
pytest.fail("All RabbitMQ messages have been consumed before resuming the RabbitMQ server")
245231

246232
deadline = time.monotonic() + CLICKHOUSE_VIEW_TIMEOUT_SEC
247233
while time.monotonic() < deadline:
@@ -325,14 +311,11 @@ def test_rabbitmq_restore_failed_connection_without_losses_2(rabbitmq_cluster, r
325311
else:
326312
pytest.fail(f"Time limit of {DEFAULT_TIMEOUT_SEC} seconds reached. The count is still 0.")
327313

328-
suspend_rabbitmq(rabbitmq_cluster, rabbitmq_monitor)
329-
330-
number = int(instance.query("SELECT count() FROM test.view"))
331-
logging.debug(f"{number}/{messages_num} after suspending RabbitMQ")
332-
if number == messages_num:
333-
pytest.fail("All RabbitMQ messages have been consumed before resuming the RabbitMQ server")
334-
335-
resume_rabbitmq(rabbitmq_cluster, rabbitmq_monitor)
314+
with rabbitmq_cluster.pause_rabbitmq(rabbitmq_monitor):
315+
number = int(instance.query("SELECT count() FROM test.view"))
316+
logging.debug(f"{number}/{messages_num} after suspending RabbitMQ")
317+
if number == messages_num:
318+
pytest.fail("All RabbitMQ messages have been consumed before resuming the RabbitMQ server")
336319

337320
# while int(instance.query('SELECT count() FROM test.view')) == 0:
338321
# time.sleep(0.1)

0 commit comments

Comments
 (0)