Skip to content

Commit e4b4847

Browse files
authored
chore: avoid squashing when squashing_current_reply_size crosses limit (#4924)
* add flag squashed_reply_size_limit * disable squashing when squashing_current_reply_size crosses flag limit * add test Signed-off-by: kostas <[email protected]>
1 parent d3d222d commit e4b4847

File tree

3 files changed

+60
-5
lines changed

3 files changed

+60
-5
lines changed

src/facade/dragonfly_connection.cc

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,10 @@ ABSL_FLAG(uint32_t, max_busy_read_usec, 100,
9898
"Maximum time we read and parse from "
9999
"a socket without yielding. In microseconds.");
100100

101+
ABSL_FLAG(size_t, squashed_reply_size_limit, 0,
102+
"Max bytes allowed for squashing_current_reply_size. If this limit is reached, "
103+
"connections dispatching pipelines won't squash them.");
104+
101105
using namespace util;
102106
using namespace std;
103107
using absl::GetFlag;
@@ -180,6 +184,8 @@ bool TrafficLogger::Write(iovec* blobs, size_t len) {
180184
thread_local TrafficLogger tl_traffic_logger{};
181185
thread_local base::Histogram* io_req_size_hist = nullptr;
182186

187+
thread_local const size_t reply_size_limit = absl::GetFlag(FLAGS_squashed_reply_size_limit);
188+
183189
void OpenTrafficLogger(string_view base_path) {
184190
unique_lock lk{tl_traffic_logger.mutex};
185191
if (tl_traffic_logger.log_file)
@@ -1158,7 +1164,7 @@ void Connection::DispatchSingle(bool has_more, absl::FunctionRef<void()> invoke_
11581164
last_interaction_ = time(nullptr);
11591165

11601166
// We might have blocked the dispatch queue from processing, wake it up.
1161-
if (dispatch_q_.size() > 0)
1167+
if (!dispatch_q_.empty())
11621168
cnd_.notify_one();
11631169
}
11641170
}
@@ -1632,7 +1638,8 @@ void Connection::AsyncFiber() {
16321638
bool squashing_enabled = squashing_threshold > 0;
16331639
bool threshold_reached = pending_pipeline_cmd_cnt_ > squashing_threshold;
16341640
bool are_all_plain_cmds = pending_pipeline_cmd_cnt_ == dispatch_q_.size();
1635-
if (squashing_enabled && threshold_reached && are_all_plain_cmds && !skip_next_squashing_) {
1641+
if (squashing_enabled && threshold_reached && are_all_plain_cmds && !skip_next_squashing_ &&
1642+
!IsReplySizeOverLimit()) {
16361643
SquashPipeline();
16371644
} else {
16381645
MessageHandle msg = std::move(dispatch_q_.front());
@@ -2059,6 +2066,16 @@ void Connection::DecrNumConns() {
20592066
--stats_->num_conns_other;
20602067
}
20612068

2069+
bool Connection::IsReplySizeOverLimit() const {
2070+
std::atomic<size_t>& reply_sz = tl_facade_stats->reply_stats.squashing_current_reply_size;
2071+
size_t current = reply_sz.load(std::memory_order_acquire);
2072+
const bool over_limit = reply_size_limit != 0 && current > 0 && current > reply_size_limit;
2073+
// Every 10 seconds. Otherwise, it can be too sensitive on certain workloads in production
2074+
// instances.
2075+
LOG_EVERY_N(INFO, 10) << "MultiCommandSquasher overlimit: " << current << "/" << reply_size_limit;
2076+
return over_limit;
2077+
}
2078+
20622079
void Connection::SetMaxQueueLenThreadLocal(unsigned tid, uint32_t val) {
20632080
thread_queue_backpressure[tid].pipeline_queue_max_len = val;
20642081
thread_queue_backpressure[tid].pipeline_cnd.notify_all();
@@ -2089,7 +2106,7 @@ void Connection::EnsureMemoryBudget(unsigned tid) {
20892106

20902107
Connection::WeakRef::WeakRef(std::shared_ptr<Connection> ptr, unsigned thread_id,
20912108
uint32_t client_id)
2092-
: ptr_{ptr}, thread_id_{thread_id}, client_id_{client_id} {
2109+
: ptr_{std::move(ptr)}, thread_id_{thread_id}, client_id_{client_id} {
20932110
}
20942111

20952112
unsigned Connection::WeakRef::Thread() const {
@@ -2115,7 +2132,7 @@ uint32_t Connection::WeakRef::GetClientId() const {
21152132
return client_id_;
21162133
}
21172134

2118-
bool Connection::WeakRef::operator<(const WeakRef& other) {
2135+
bool Connection::WeakRef::operator<(const WeakRef& other) const {
21192136
return client_id_ < other.client_id_;
21202137
}
21212138

src/facade/dragonfly_connection.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ class Connection : public util::Connection {
196196
// Returns client id.Thread-safe.
197197
uint32_t GetClientId() const;
198198

199-
bool operator<(const WeakRef& other);
199+
bool operator<(const WeakRef& other) const;
200200
bool operator==(const WeakRef& other) const;
201201

202202
private:
@@ -420,6 +420,8 @@ class Connection : public util::Connection {
420420
void IncrNumConns();
421421
void DecrNumConns();
422422

423+
bool IsReplySizeOverLimit() const;
424+
423425
std::deque<MessageHandle> dispatch_q_; // dispatch queue
424426
util::fb2::CondVarAny cnd_; // dispatch queue waker
425427
util::fb2::Fiber async_fb_; // async fiber (if started)

tests/dragonfly/memory_test.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import pytest
2+
import asyncio
23
from redis import asyncio as aioredis
34
from .utility import *
45
import logging
@@ -222,3 +223,38 @@ async def test_cache_eviction_with_rss_deny_oom(
222223
)
223224
stats_info = await async_client.info("stats")
224225
logging.info(f'Current evicted: {stats_info["evicted_keys"]}. Total keys: {num_keys}.')
226+
227+
228+
@pytest.mark.asyncio
229+
async def test_throttle_on_commands_squashing_replies_bytes(df_factory: DflyInstanceFactory):
230+
df = df_factory.create(
231+
proactor_threads=2,
232+
squashed_reply_size_limit=500_000_000,
233+
vmodule="dragonfly_connection=2",
234+
)
235+
df.start()
236+
237+
client = df.client()
238+
# 0.5gb
239+
await client.execute_command("debug populate 64 test 3125 rand type hash elements 500")
240+
241+
async def poll():
242+
# At any point we should not cross this limit
243+
assert df.rss < 1_500_000_000
244+
cl = df.client()
245+
pipe = cl.pipeline(transaction=False)
246+
for i in range(64):
247+
pipe.execute_command(f"hgetall test:{i}")
248+
249+
await pipe.execute()
250+
251+
tasks = []
252+
for i in range(20):
253+
tasks.append(asyncio.create_task(poll()))
254+
255+
for task in tasks:
256+
await task
257+
258+
df.stop()
259+
found = df.find_in_logs("MultiCommandSquasher overlimit: ")
260+
assert len(found) > 0

0 commit comments

Comments
 (0)