Skip to content

Commit e4b0ea7

Browse files
authored
Merge pull request ClickHouse#79670 from ClickHouse/hanfei/refine-keeper-oom-exception
refine the exception message when keeper reaches soft memory limit
2 parents b8d7ddb + 55966d9 commit e4b0ea7

File tree

8 files changed

+20
-11
lines changed

8 files changed

+20
-11
lines changed

src/Common/ZooKeeper/IKeeper.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ const char * errorMessage(Error code)
8080
case Error::ZOPERATIONTIMEOUT: return "Operation timeout";
8181
case Error::ZBADARGUMENTS: return "Bad arguments";
8282
case Error::ZINVALIDSTATE: return "Invalid zhandle state";
83+
case Error::ZOUTOFMEMORY: return "Out of Memory";
8384
case Error::ZAPIERROR: return "API error";
8485
case Error::ZNONODE: return "No node";
8586
case Error::ZNOAUTH: return "Not authenticated";

src/Common/ZooKeeper/IKeeper.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ enum class Error : int32_t
8282
ZOPERATIONTIMEOUT = -7, /// Operation timeout
8383
ZBADARGUMENTS = -8, /// Invalid arguments
8484
ZINVALIDSTATE = -9, /// Invalid zhandle state
85+
ZOUTOFMEMORY = -10, /// Keeper has reached soft memory limit
8586

8687
/** API errors.
8788
* This is never thrown by the server, it shouldn't be used other than

src/Coordination/KeeperDispatcher.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ void KeeperDispatcher::requestThread()
173173
ReadableSize(total_memory_tracker.get()),
174174
ReadableSize(total_memory_tracker.getRSS()),
175175
request.request->getOpNum());
176-
addErrorResponses({request}, Coordination::Error::ZCONNECTIONLOSS);
176+
addErrorResponses({request}, Coordination::Error::ZOUTOFMEMORY);
177177
continue;
178178
}
179179

tests/integration/test_keeper_memory_soft_limit/configs/keeper_config1.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
<value>az-zoo1</value>
1616
</availability_zone>
1717
<server_id>1</server_id>
18-
<max_memory_usage_soft_limit>200000000</max_memory_usage_soft_limit>
18+
<max_memory_usage_soft_limit>80000000</max_memory_usage_soft_limit>
1919

2020
<coordination_settings>
2121
<operation_timeout_ms>10000</operation_timeout_ms>

tests/integration/test_keeper_memory_soft_limit/configs/keeper_config2.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
<value>az-zoo2</value>
1717
<enable_auto_detection_on_cloud>1</enable_auto_detection_on_cloud>
1818
</availability_zone>
19-
<max_memory_usage_soft_limit>200000000</max_memory_usage_soft_limit>
19+
<max_memory_usage_soft_limit>80000000</max_memory_usage_soft_limit>
2020

2121
<coordination_settings>
2222
<operation_timeout_ms>10000</operation_timeout_ms>

tests/integration/test_keeper_memory_soft_limit/configs/keeper_config3.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
<tcp_port>2181</tcp_port>
1414
<server_id>3</server_id>
1515

16-
<max_memory_usage_soft_limit>200000000</max_memory_usage_soft_limit>
16+
<max_memory_usage_soft_limit>80000000</max_memory_usage_soft_limit>
1717

1818
<coordination_settings>
1919
<operation_timeout_ms>10000</operation_timeout_ms>
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
<clickhouse>
2+
<allow_zookeeper_write>1</allow_zookeeper_write>
3+
</clickhouse>

tests/integration/test_keeper_memory_soft_limit/test.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44

55
import pytest
66
from kazoo.client import KazooClient
7-
from kazoo.exceptions import ConnectionLoss
8-
97
from helpers.cluster import ClickHouseCluster
108

119
cluster = ClickHouseCluster(__file__, keeper_config_dir="configs/")
@@ -16,6 +14,7 @@
1614
stay_alive=True,
1715
with_zookeeper=True,
1816
with_remote_database_disk=False, # Disable `with_remote_database_disk` as the test does not use the default Keeper.
17+
main_configs=["configs/setting.xml"],
1918
)
2019

2120

@@ -44,23 +43,28 @@ def started_cluster():
4443

4544

4645
def test_soft_limit_create(started_cluster):
46+
if node.is_built_with_sanitizer():
47+
pytest.skip("Disabled for sanitizers")
4748
started_cluster.wait_zookeeper_to_start()
49+
node_zk = get_connection_zk("zoo1")
4850
try:
49-
node_zk = get_connection_zk("zoo1")
5051
loop_time = 100000
5152
node_zk.create("/test_soft_limit", b"abc")
53+
path = "/test_soft_limit"
5254

5355
for i in range(loop_time):
54-
node_zk.create(
55-
"/test_soft_limit/node_" + str(i), random_string(1000).encode()
56-
)
57-
except ConnectionLoss:
56+
name = "node_" + str(i)
57+
node.query(f"INSERT INTO system.zookeeper (name, path, value) values ('{name}', '{path}', repeat('a', 3000))")
58+
except Exception as e:
59+
# the message contains out of memory so the users will not be confused.
60+
assert 'out of memory' in str(e).lower()
5861
txn = node_zk.transaction()
5962
for i in range(10):
6063
txn.delete("/test_soft_limit/node_" + str(i))
6164

6265
txn.create("/test_soft_limit/node_1000001" + str(i), b"abcde")
6366
txn.commit()
67+
assert "0\n" == node.query("select sum(ProfileEvent_ZooKeeperHardwareExceptions) from system.metric_log")
6468
return
6569

6670
raise Exception("all records are inserted but no error occurs")

0 commit comments

Comments
 (0)