Skip to content

Commit 060ec35

Browse files
sundboranagra
andauthored
Gradually reduce defrag CPU usage when defragmentation is ineffective (redis#13752)
This PR addresses an issue where if a module does not provide a defragmentation callback, we cannot defragment the fragmentation it generates. However, the defragmentation process still considers a large amount of fragmentation to be present, leading to more aggressive defragmentation efforts that ultimately have no effect. To mitigate this, the PR introduces a mechanism to gradually reduce the CPU consumption for defragmentation when the defragmentation effectiveness is poor. This occurs when the fragmentation rate drops below 2% and the hit ratio is less than 1%, or when the fragmentation rate increases by no more than 2%. The CPU consumption will be gradually decreased until it reaches the minimum threshold defined by `active-defrag-cycle-min`. --------- Co-authored-by: oranagra <[email protected]>
1 parent 810eacd commit 060ec35

File tree

2 files changed

+126
-7
lines changed

2 files changed

+126
-7
lines changed

src/defrag.c

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
#include "server.h"
1616
#include <stddef.h>
17+
#include <math.h>
1718

1819
#ifdef HAVE_DEFRAG
1920

@@ -1024,7 +1025,7 @@ int defragLaterStep(redisDb *db, int slot, long long endtime) {
10241025
#define LIMIT(y, min, max) ((y)<(min)? min: ((y)>(max)? max: (y)))
10251026

10261027
/* decide if defrag is needed, and at what CPU effort to invest in it */
1027-
void computeDefragCycles(void) {
1028+
void computeDefragCycles(float decay_rate) {
10281029
size_t frag_bytes;
10291030
float frag_pct = getAllocatorFragmentation(&frag_bytes);
10301031
/* If we're not already running, and below the threshold, exit. */
@@ -1040,6 +1041,7 @@ void computeDefragCycles(void) {
10401041
server.active_defrag_threshold_upper,
10411042
server.active_defrag_cycle_min,
10421043
server.active_defrag_cycle_max);
1044+
cpu_pct *= decay_rate;
10431045
cpu_pct = LIMIT(cpu_pct,
10441046
server.active_defrag_cycle_min,
10451047
server.active_defrag_cycle_max);
@@ -1068,7 +1070,9 @@ void activeDefragCycle(void) {
10681070
static int defrag_stage = 0;
10691071
static unsigned long defrag_cursor = 0;
10701072
static redisDb *db = NULL;
1071-
static long long start_scan, start_stat;
1073+
static long long start_scan, start_hits, start_misses;
1074+
static float start_frag_pct;
1075+
static float decay_rate = 1.0f;
10721076
unsigned int iterations = 0;
10731077
unsigned long long prev_defragged = server.stat_active_defrag_hits;
10741078
unsigned long long prev_scanned = server.stat_active_defrag_scanned;
@@ -1104,13 +1108,13 @@ void activeDefragCycle(void) {
11041108
/* Once a second, check if the fragmentation justfies starting a scan
11051109
* or making it more aggressive. */
11061110
run_with_period(1000) {
1107-
computeDefragCycles();
1111+
computeDefragCycles(decay_rate);
11081112
}
11091113

11101114
/* Normally it is checked once a second, but when there is a configuration
11111115
* change, we want to check it as soon as possible. */
11121116
if (server.active_defrag_configuration_changed) {
1113-
computeDefragCycles();
1117+
computeDefragCycles(decay_rate);
11141118
server.active_defrag_configuration_changed = 0;
11151119
}
11161120

@@ -1148,7 +1152,7 @@ void activeDefragCycle(void) {
11481152
float frag_pct = getAllocatorFragmentation(&frag_bytes);
11491153
serverLog(LL_VERBOSE,
11501154
"Active defrag done in %dms, reallocated=%d, frag=%.0f%%, frag_bytes=%zu",
1151-
(int)((now - start_scan)/1000), (int)(server.stat_active_defrag_hits - start_stat), frag_pct, frag_bytes);
1155+
(int)((now - start_scan)/1000), (int)(server.stat_active_defrag_hits - start_hits), frag_pct, frag_bytes);
11521156

11531157
start_scan = now;
11541158
current_db = -1;
@@ -1159,17 +1163,36 @@ void activeDefragCycle(void) {
11591163
db = NULL;
11601164
server.active_defrag_running = 0;
11611165

1166+
long long last_hits = server.stat_active_defrag_hits - start_hits;
1167+
long long last_misses = server.stat_active_defrag_misses - start_misses;
1168+
float last_frag_pct_change = start_frag_pct - frag_pct;
1169+
/* When defragmentation efficiency is low, we gradually reduce the
1170+
* speed for the next cycle to avoid CPU waste. However, in the
1171+
* following two cases, we keep the normal speed:
1172+
* 1) If the fragmentation percentage has increased or decreased by more than 2%.
1173+
* 2) If the fragmentation percentage decrease is small, but hits are above 1%,
1174+
* we still keep the normal speed. */
1175+
if (fabs(last_frag_pct_change) > 2 ||
1176+
(last_frag_pct_change < 0 && last_hits >= (last_hits + last_misses) * 0.01))
1177+
{
1178+
decay_rate = 1.0f;
1179+
} else {
1180+
decay_rate *= 0.9;
1181+
}
1182+
11621183
moduleDefragEnd();
11631184

1164-
computeDefragCycles(); /* if another scan is needed, start it right away */
1185+
computeDefragCycles(decay_rate); /* if another scan is needed, start it right away */
11651186
if (server.active_defrag_running != 0 && ustime() < endtime)
11661187
continue;
11671188
break;
11681189
}
11691190
else if (current_db==0) {
11701191
/* Start a scan from the first database. */
11711192
start_scan = ustime();
1172-
start_stat = server.stat_active_defrag_hits;
1193+
start_hits = server.stat_active_defrag_hits;
1194+
start_misses = server.stat_active_defrag_misses;
1195+
start_frag_pct = getAllocatorFragmentation(NULL);
11731196
}
11741197

11751198
db = &server.db[current_db];

tests/unit/moduleapi/datatype.tcl

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,4 +136,100 @@ start_server {tags {"modules"}} {
136136

137137
assert_equal 1 [llength $keys]
138138
}
139+
140+
if {[string match {*jemalloc*} [s mem_allocator]] && [r debug mallctl arenas.page] <= 8192} {
141+
test {Reduce defrag CPU usage when module data can't be defragged} {
142+
r flushdb
143+
r config set hz 100
144+
r config set activedefrag no
145+
r config set active-defrag-threshold-lower 5
146+
r config set active-defrag-cycle-min 25
147+
r config set active-defrag-cycle-max 75
148+
r config set active-defrag-ignore-bytes 100kb
149+
150+
# Populate memory with interleaving field of same size.
151+
set n 20000
152+
set dummy "[string repeat x 400]"
153+
set rd [redis_deferring_client]
154+
for {set i 0} {$i < $n} {incr i} { $rd datatype.set k$i 1 $dummy }
155+
for {set i 0} {$i < [expr $n]} {incr i} { $rd read } ;# Discard replies
156+
157+
after 120 ;# serverCron only updates the info once in 100ms
158+
if {$::verbose} {
159+
puts "used [s allocator_allocated]"
160+
puts "rss [s allocator_active]"
161+
puts "frag [s allocator_frag_ratio]"
162+
puts "frag_bytes [s allocator_frag_bytes]"
163+
}
164+
assert_lessthan [s allocator_frag_ratio] 1.05
165+
166+
for {set i 0} {$i < $n} {incr i 2} { $rd del k$i }
167+
for {set j 0} {$j < $n} {incr j 2} { $rd read } ; # Discard del replies
168+
after 120 ;# serverCron only updates the info once in 100ms
169+
assert_morethan [s allocator_frag_ratio] 1.4
170+
171+
catch {r config set activedefrag yes} e
172+
if {[r config get activedefrag] eq "activedefrag yes"} {
173+
# wait for the active defrag to start working (decision once a second)
174+
wait_for_condition 50 100 {
175+
[s total_active_defrag_time] ne 0
176+
} else {
177+
after 120 ;# serverCron only updates the info once in 100ms
178+
puts [r info memory]
179+
puts [r info stats]
180+
puts [r memory malloc-stats]
181+
fail "defrag not started."
182+
}
183+
assert_morethan [s allocator_frag_ratio] 1.4
184+
185+
# The cpu usage of defragment will drop to active-defrag-cycle-min
186+
wait_for_condition 1000 50 {
187+
[s active_defrag_running] == 25
188+
} else {
189+
fail "Unable to reduce the defragmentation speed."
190+
}
191+
192+
# Fuzzy test to restore defragmentation speed to normal
193+
set end_time [expr {[clock seconds] + 10}]
194+
set speed_restored 0
195+
while {[clock seconds] < $end_time} {
196+
switch [expr {int(rand() * 3)}] {
197+
0 {
198+
# Randomly delete a key
199+
set random_key [r RANDOMKEY]
200+
if {$random_key != ""} {
201+
r DEL $random_key
202+
}
203+
}
204+
1 {
205+
# Randomly overwrite a key
206+
set random_key [r RANDOMKEY]
207+
if {$random_key != ""} {
208+
r datatype.set $random_key 1 $dummy
209+
}
210+
}
211+
2 {
212+
# Randomly generate a new key
213+
set random_key "key_[expr {int(rand() * 10000)}]"
214+
r datatype.set $random_key 1 $dummy
215+
}
216+
}
217+
218+
# Wait for defragmentation speed to restore.
219+
if {[s active_defrag_running] > 25} {
220+
set speed_restored 1
221+
break;
222+
}
223+
}
224+
assert_equal $speed_restored 1
225+
226+
# After the traffic disappears, the defragmentation speed will decrease again.
227+
wait_for_condition 1000 50 {
228+
[s active_defrag_running] == 25
229+
} else {
230+
fail "Unable to reduce the defragmentation speed after traffic disappears."
231+
}
232+
}
233+
}
234+
}
139235
}

0 commit comments

Comments
 (0)