Skip to content

Commit 9769378

Browse files
ebiggersMikulas Patocka
authored andcommitted
dm-bufio: remove maximum age based eviction
Every 30 seconds, dm-bufio evicts all buffers that were not accessed within the last max_age_seconds, except those pinned in memory via retain_bytes. By default max_age_seconds is 300 (i.e. 5 minutes), and retain_bytes is 262144 (i.e. 256 KiB) per dm-bufio client. This eviction algorithm is much too eager and is also redundant with the shinker based eviction. Testing on an Android phone shows that about 30 MB of dm-bufio buffers (from dm-verity Merkle tree blocks) are loaded at boot time, and then about 90% of them are suddenly thrown away 5 minutes after boot. This results in unnecessary Merkle tree I/O later. Meanwhile, if the system actually encounters memory pressure, testing also shows that the shrinker is effective at evicting the buffers. Other major Linux kernel caches, such as the page cache, do not enforce a maximum age, instead relying on the shrinker. For these reasons, Android is now setting max_age_seconds to 86400 (i.e. 1 day), which mostly disables it; see https://android.googlesource.com/platform/system/core/+/cadad290a79d5b0a30add935aaadab7c1b1ef5e9%5E%21/ That is a much better default, but really the maximum age based eviction should not exist at all. Let's remove it. Note that this also eliminates the need to run work every 30 seconds, which is beneficial too. Signed-off-by: Eric Biggers <[email protected]> Signed-off-by: Mikulas Patocka <[email protected]>
1 parent f9ed312 commit 9769378

File tree

1 file changed

+36
-153
lines changed

1 file changed

+36
-153
lines changed

drivers/md/dm-bufio.c

Lines changed: 36 additions & 153 deletions
Original file line numberDiff line numberDiff line change
@@ -40,16 +40,6 @@
4040
#define DM_BUFIO_WRITEBACK_RATIO 3
4141
#define DM_BUFIO_LOW_WATERMARK_RATIO 16
4242

43-
/*
44-
* Check buffer ages in this interval (seconds)
45-
*/
46-
#define DM_BUFIO_WORK_TIMER_SECS 30
47-
48-
/*
49-
* Free buffers when they are older than this (seconds)
50-
*/
51-
#define DM_BUFIO_DEFAULT_AGE_SECS 300
52-
5343
/*
5444
* The nr of bytes of cached data to keep around.
5545
*/
@@ -1055,10 +1045,8 @@ static unsigned long dm_bufio_cache_size_latch;
10551045

10561046
static DEFINE_SPINLOCK(global_spinlock);
10571047

1058-
/*
1059-
* Buffers are freed after this timeout
1060-
*/
1061-
static unsigned int dm_bufio_max_age = DM_BUFIO_DEFAULT_AGE_SECS;
1048+
static unsigned int dm_bufio_max_age; /* No longer does anything */
1049+
10621050
static unsigned long dm_bufio_retain_bytes = DM_BUFIO_DEFAULT_RETAIN_BYTES;
10631051

10641052
static unsigned long dm_bufio_peak_allocated;
@@ -1086,7 +1074,6 @@ static LIST_HEAD(dm_bufio_all_clients);
10861074
static DEFINE_MUTEX(dm_bufio_clients_lock);
10871075

10881076
static struct workqueue_struct *dm_bufio_wq;
1089-
static struct delayed_work dm_bufio_cleanup_old_work;
10901077
static struct work_struct dm_bufio_replacement_work;
10911078

10921079

@@ -2673,130 +2660,6 @@ EXPORT_SYMBOL_GPL(dm_bufio_set_sector_offset);
26732660

26742661
/*--------------------------------------------------------------*/
26752662

2676-
static unsigned int get_max_age_hz(void)
2677-
{
2678-
unsigned int max_age = READ_ONCE(dm_bufio_max_age);
2679-
2680-
if (max_age > UINT_MAX / HZ)
2681-
max_age = UINT_MAX / HZ;
2682-
2683-
return max_age * HZ;
2684-
}
2685-
2686-
static bool older_than(struct dm_buffer *b, unsigned long age_hz)
2687-
{
2688-
return time_after_eq(jiffies, READ_ONCE(b->last_accessed) + age_hz);
2689-
}
2690-
2691-
struct evict_params {
2692-
gfp_t gfp;
2693-
unsigned long age_hz;
2694-
2695-
/*
2696-
* This gets updated with the largest last_accessed (ie. most
2697-
* recently used) of the evicted buffers. It will not be reinitialised
2698-
* by __evict_many(), so you can use it across multiple invocations.
2699-
*/
2700-
unsigned long last_accessed;
2701-
};
2702-
2703-
/*
2704-
* We may not be able to evict this buffer if IO pending or the client
2705-
* is still using it.
2706-
*
2707-
* And if GFP_NOFS is used, we must not do any I/O because we hold
2708-
* dm_bufio_clients_lock and we would risk deadlock if the I/O gets
2709-
* rerouted to different bufio client.
2710-
*/
2711-
static enum evict_result select_for_evict(struct dm_buffer *b, void *context)
2712-
{
2713-
struct evict_params *params = context;
2714-
2715-
if (!(params->gfp & __GFP_FS) ||
2716-
(static_branch_unlikely(&no_sleep_enabled) && b->c->no_sleep)) {
2717-
if (test_bit_acquire(B_READING, &b->state) ||
2718-
test_bit(B_WRITING, &b->state) ||
2719-
test_bit(B_DIRTY, &b->state))
2720-
return ER_DONT_EVICT;
2721-
}
2722-
2723-
return older_than(b, params->age_hz) ? ER_EVICT : ER_STOP;
2724-
}
2725-
2726-
static unsigned long __evict_many(struct dm_bufio_client *c,
2727-
struct evict_params *params,
2728-
int list_mode, unsigned long max_count)
2729-
{
2730-
unsigned long count;
2731-
unsigned long last_accessed;
2732-
struct dm_buffer *b;
2733-
2734-
for (count = 0; count < max_count; count++) {
2735-
b = cache_evict(&c->cache, list_mode, select_for_evict, params);
2736-
if (!b)
2737-
break;
2738-
2739-
last_accessed = READ_ONCE(b->last_accessed);
2740-
if (time_after_eq(params->last_accessed, last_accessed))
2741-
params->last_accessed = last_accessed;
2742-
2743-
__make_buffer_clean(b);
2744-
__free_buffer_wake(b);
2745-
2746-
cond_resched();
2747-
}
2748-
2749-
return count;
2750-
}
2751-
2752-
static void evict_old_buffers(struct dm_bufio_client *c, unsigned long age_hz)
2753-
{
2754-
struct evict_params params = {.gfp = 0, .age_hz = age_hz, .last_accessed = 0};
2755-
unsigned long retain = get_retain_buffers(c);
2756-
unsigned long count;
2757-
LIST_HEAD(write_list);
2758-
2759-
dm_bufio_lock(c);
2760-
2761-
__check_watermark(c, &write_list);
2762-
if (unlikely(!list_empty(&write_list))) {
2763-
dm_bufio_unlock(c);
2764-
__flush_write_list(&write_list);
2765-
dm_bufio_lock(c);
2766-
}
2767-
2768-
count = cache_total(&c->cache);
2769-
if (count > retain)
2770-
__evict_many(c, &params, LIST_CLEAN, count - retain);
2771-
2772-
dm_bufio_unlock(c);
2773-
}
2774-
2775-
static void cleanup_old_buffers(void)
2776-
{
2777-
unsigned long max_age_hz = get_max_age_hz();
2778-
struct dm_bufio_client *c;
2779-
2780-
mutex_lock(&dm_bufio_clients_lock);
2781-
2782-
__cache_size_refresh();
2783-
2784-
list_for_each_entry(c, &dm_bufio_all_clients, client_list)
2785-
evict_old_buffers(c, max_age_hz);
2786-
2787-
mutex_unlock(&dm_bufio_clients_lock);
2788-
}
2789-
2790-
static void work_fn(struct work_struct *w)
2791-
{
2792-
cleanup_old_buffers();
2793-
2794-
queue_delayed_work(dm_bufio_wq, &dm_bufio_cleanup_old_work,
2795-
DM_BUFIO_WORK_TIMER_SECS * HZ);
2796-
}
2797-
2798-
/*--------------------------------------------------------------*/
2799-
28002663
/*
28012664
* Global cleanup tries to evict the oldest buffers from across _all_
28022665
* the clients. It does this by repeatedly evicting a few buffers from
@@ -2834,27 +2697,51 @@ static void __insert_client(struct dm_bufio_client *new_client)
28342697
list_add_tail(&new_client->client_list, h);
28352698
}
28362699

2700+
static enum evict_result select_for_evict(struct dm_buffer *b, void *context)
2701+
{
2702+
/* In no-sleep mode, we cannot wait on IO. */
2703+
if (static_branch_unlikely(&no_sleep_enabled) && b->c->no_sleep) {
2704+
if (test_bit_acquire(B_READING, &b->state) ||
2705+
test_bit(B_WRITING, &b->state) ||
2706+
test_bit(B_DIRTY, &b->state))
2707+
return ER_DONT_EVICT;
2708+
}
2709+
return ER_EVICT;
2710+
}
2711+
28372712
static unsigned long __evict_a_few(unsigned long nr_buffers)
28382713
{
2839-
unsigned long count;
28402714
struct dm_bufio_client *c;
2841-
struct evict_params params = {
2842-
.gfp = GFP_KERNEL,
2843-
.age_hz = 0,
2844-
/* set to jiffies in case there are no buffers in this client */
2845-
.last_accessed = jiffies
2846-
};
2715+
unsigned long oldest_buffer = jiffies;
2716+
unsigned long last_accessed;
2717+
unsigned long count;
2718+
struct dm_buffer *b;
28472719

28482720
c = __pop_client();
28492721
if (!c)
28502722
return 0;
28512723

28522724
dm_bufio_lock(c);
2853-
count = __evict_many(c, &params, LIST_CLEAN, nr_buffers);
2725+
2726+
for (count = 0; count < nr_buffers; count++) {
2727+
b = cache_evict(&c->cache, LIST_CLEAN, select_for_evict, NULL);
2728+
if (!b)
2729+
break;
2730+
2731+
last_accessed = READ_ONCE(b->last_accessed);
2732+
if (time_after_eq(oldest_buffer, last_accessed))
2733+
oldest_buffer = last_accessed;
2734+
2735+
__make_buffer_clean(b);
2736+
__free_buffer_wake(b);
2737+
2738+
cond_resched();
2739+
}
2740+
28542741
dm_bufio_unlock(c);
28552742

28562743
if (count)
2857-
c->oldest_buffer = params.last_accessed;
2744+
c->oldest_buffer = oldest_buffer;
28582745
__insert_client(c);
28592746

28602747
return count;
@@ -2937,10 +2824,7 @@ static int __init dm_bufio_init(void)
29372824
if (!dm_bufio_wq)
29382825
return -ENOMEM;
29392826

2940-
INIT_DELAYED_WORK(&dm_bufio_cleanup_old_work, work_fn);
29412827
INIT_WORK(&dm_bufio_replacement_work, do_global_cleanup);
2942-
queue_delayed_work(dm_bufio_wq, &dm_bufio_cleanup_old_work,
2943-
DM_BUFIO_WORK_TIMER_SECS * HZ);
29442828

29452829
return 0;
29462830
}
@@ -2952,7 +2836,6 @@ static void __exit dm_bufio_exit(void)
29522836
{
29532837
int bug = 0;
29542838

2955-
cancel_delayed_work_sync(&dm_bufio_cleanup_old_work);
29562839
destroy_workqueue(dm_bufio_wq);
29572840

29582841
if (dm_bufio_client_count) {
@@ -2989,7 +2872,7 @@ module_param_named(max_cache_size_bytes, dm_bufio_cache_size, ulong, 0644);
29892872
MODULE_PARM_DESC(max_cache_size_bytes, "Size of metadata cache");
29902873

29912874
module_param_named(max_age_seconds, dm_bufio_max_age, uint, 0644);
2992-
MODULE_PARM_DESC(max_age_seconds, "Max age of a buffer in seconds");
2875+
MODULE_PARM_DESC(max_age_seconds, "No longer does anything");
29932876

29942877
module_param_named(retain_bytes, dm_bufio_retain_bytes, ulong, 0644);
29952878
MODULE_PARM_DESC(retain_bytes, "Try to keep at least this many bytes cached in memory");

0 commit comments

Comments
 (0)