@@ -31,7 +31,7 @@ ABSL_FLAG(float, mem_defrag_threshold, 0.7,
31
31
" Minimum percentage of used memory relative to maxmemory cap before running "
32
32
" defragmentation" );
33
33
34
- ABSL_FLAG (uint32_t , mem_defrag_check_sec_interval, 10 ,
34
+ ABSL_FLAG (uint32_t , mem_defrag_check_sec_interval, 60 ,
35
35
" Number of seconds between every defragmentation necessity check" );
36
36
37
37
ABSL_FLAG (float , mem_defrag_waste_threshold, 0.2 ,
@@ -70,24 +70,6 @@ namespace {
70
70
71
71
constexpr uint64_t kCursorDoneState = 0u ;
72
72
73
- struct ShardMemUsage {
74
- std::size_t commited = 0 ;
75
- std::size_t used = 0 ;
76
- std::size_t wasted_mem = 0 ;
77
- };
78
-
79
- std::ostream& operator <<(std::ostream& os, const ShardMemUsage& mem) {
80
- return os << " commited: " << mem.commited << " vs used " << mem.used << " , wasted memory "
81
- << mem.wasted_mem ;
82
- }
83
-
84
- ShardMemUsage ReadShardMemUsage (float wasted_ratio) {
85
- ShardMemUsage usage;
86
- zmalloc_get_allocator_wasted_blocks (wasted_ratio, &usage.used , &usage.commited ,
87
- &usage.wasted_mem );
88
- return usage;
89
- }
90
-
91
73
bool HasContendedLocks (ShardId shard_id, Transaction* trx, const DbTable* table) {
92
74
auto is_contended = [table](LockFp fp) { return table->trans_locks .Find (fp)->IsContended (); };
93
75
@@ -249,26 +231,42 @@ bool EngineShard::DefragTaskState::CheckRequired() {
249
231
return false ;
250
232
}
251
233
252
- const std::size_t global_threshold = limit * GetFlag (FLAGS_mem_defrag_threshold);
234
+ static thread_local fragmentation_info finfo{.committed = 0 , .wasted = 0 , .bin = 0 };
235
+
236
+ const std::size_t global_threshold = double (limit) * GetFlag (FLAGS_mem_defrag_threshold);
253
237
if (global_threshold > rss_mem_current.load (memory_order_relaxed)) {
238
+ finfo.bin = 0 ; // reset.
254
239
return false ;
255
240
}
256
241
257
- const auto now = time (nullptr );
258
- const auto seconds_from_prev_check = now - last_check_time;
259
- const auto mem_defrag_interval = GetFlag (FLAGS_mem_defrag_check_sec_interval);
242
+ if (finfo.bin == 0 ) { // did not start the iterative checking yet
243
+ const auto now = time (nullptr );
244
+ const auto seconds_from_prev_check = now - last_check_time;
245
+ const auto mem_defrag_interval = GetFlag (FLAGS_mem_defrag_check_sec_interval);
260
246
261
- if (seconds_from_prev_check < mem_defrag_interval) {
262
- return false ;
247
+ if (seconds_from_prev_check < mem_defrag_interval) {
248
+ return false ;
249
+ }
250
+
251
+ // start checking.
252
+ finfo.committed = finfo.wasted = 0 ;
263
253
}
264
- last_check_time = now;
265
254
266
- ShardMemUsage usage = ReadShardMemUsage (GetFlag (FLAGS_mem_defrag_page_utilization_threshold));
255
+ uint64_t start = absl::GetCurrentTimeNanos ();
256
+ int res = zmalloc_get_allocator_fragmentation_step (
257
+ GetFlag (FLAGS_mem_defrag_page_utilization_threshold), &finfo);
258
+ uint64_t duration = absl::GetCurrentTimeNanos () - start;
259
+ VLOG_IF (1 , duration > 20'000 ) << " Reading memory usage took " << duration / 1'000
260
+ << " usec on bin " << finfo.bin ;
261
+ if (res == 0 ) {
262
+ // finished checking.
263
+ last_check_time = time (nullptr );
267
264
268
- const double waste_threshold = GetFlag (FLAGS_mem_defrag_waste_threshold);
269
- if (usage.wasted_mem > (uint64_t (usage.commited * waste_threshold))) {
270
- VLOG (1 ) << " memory issue found for memory " << usage;
271
- return true ;
265
+ const double waste_threshold = GetFlag (FLAGS_mem_defrag_waste_threshold);
266
+ if (finfo.wasted > size_t (finfo.committed * waste_threshold)) {
267
+ VLOG (1 ) << " memory fragmentation issue found: " << finfo.wasted << " " << finfo.committed ;
268
+ return true ;
269
+ }
272
270
}
273
271
274
272
return false ;
@@ -322,11 +320,11 @@ std::optional<CollectedPageStats> EngineShard::DoDefrag(CollectPageStats collect
322
320
defrag_state_.UpdateScanState (cur.token ());
323
321
324
322
if (reallocations > 0 ) {
325
- VLOG (1 ) << " shard " << slice.shard_id () << " : successfully defrag " << reallocations
323
+ VLOG (2 ) << " shard " << slice.shard_id () << " : successfully defrag " << reallocations
326
324
<< " times, did it in " << traverses_count << " cursor is at the "
327
325
<< (defrag_state_.cursor == kCursorDoneState ? " end" : " in progress" );
328
326
} else {
329
- VLOG (1 ) << " shard " << slice.shard_id () << " : run the defrag " << traverses_count
327
+ VLOG (2 ) << " shard " << slice.shard_id () << " : run the defrag " << traverses_count
330
328
<< " times out of maximum " << kMaxTraverses << " , with cursor at "
331
329
<< (defrag_state_.cursor == kCursorDoneState ? " end" : " in progress" )
332
330
<< " but no location for defrag were found" ;
@@ -361,7 +359,7 @@ uint32_t EngineShard::DefragTask() {
361
359
return util::ProactorBase::kOnIdleMaxLevel ;
362
360
}
363
361
}
364
- return kRunAtLowPriority ;
362
+ return 3 ; // priority.
365
363
}
366
364
367
365
EngineShard::EngineShard (util::ProactorBase* pb, mi_heap_t * heap)
@@ -713,7 +711,7 @@ void EngineShard::RetireExpiredAndEvict() {
713
711
stats_.total_heartbeat_expired_keys += stats.deleted ;
714
712
stats_.total_heartbeat_expired_bytes += stats.deleted_bytes ;
715
713
++stats_.total_heartbeat_expired_calls ;
716
- VLOG (1 ) << " Heartbeat expired " << stats.deleted << " keys with total bytes "
714
+ VLOG (2 ) << " Heartbeat expired " << stats.deleted << " keys with total bytes "
717
715
<< stats.deleted_bytes << " with total expire flow calls "
718
716
<< stats_.total_heartbeat_expired_calls ;
719
717
}
0 commit comments