Skip to content

Commit 70704c3

Browse files
Mikulas Patockasnitm
authored andcommitted
dm bufio: do buffer cleanup from a workqueue
Until now, DM bufio's waiting for IO from reclaim context in its shrinker has caused kswapd to block; which results in systemic IO stalls and even deadlock, e.g.: https://www.redhat.com/archives/dm-devel/2020-March/msg00025.html Here is Dave Chinner's problem description that motivated this fix, from: https://lore.kernel.org/linux-fsdevel/[email protected]/ "Waiting for IO in kswapd reclaim context is considered harmful - kswapd context shrinker reclaim should be as non-blocking as possible, and any back-off to wait for IO to complete should be done by the high level reclaim core once it's completed an entire reclaim scan cycle of everything.... What follows from that, and is pertinent in this situation, is that if you don't block kswapd, then other reclaim contexts are not going to get stuck waiting for it regardless of the reclaim context they use." Continued elsewhere: "The only way to fix this problem once and for all is to stop using the shrinker as a mechanism to issue and wait on IO. If you need background writeback of dirty buffers, do it from a WQ_MEM_RECLAIM workqueue that isn't directly in the memory reclaim path and so can issue writeback and block safely from a GFP_KERNEL context. Kick the workqueue from the shrinker context, but get rid of the IO submission and waiting from the shrinker and all the GFP_NOFS memory reclaim recursion problems go away." As such, this commit moves buffer cleanup to a workqueue. Suggested-by: Dave Chinner <[email protected]> Reported-by: Tahsin Erdogan <[email protected]> Signed-off-by: Mikulas Patocka <[email protected]> Tested-by: Gabriel Krisman Bertazi <[email protected]> Signed-off-by: Mike Snitzer <[email protected]>
1 parent e766668 commit 70704c3

File tree

1 file changed

+41
-19
lines changed

1 file changed

+41
-19
lines changed

drivers/md/dm-bufio.c

Lines changed: 41 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,10 @@ struct dm_bufio_client {
108108
int async_write_error;
109109

110110
struct list_head client_list;
111+
111112
struct shrinker shrinker;
113+
struct work_struct shrink_work;
114+
atomic_long_t need_shrink;
112115
};
113116

114117
/*
@@ -1634,8 +1637,7 @@ static unsigned long get_retain_buffers(struct dm_bufio_client *c)
16341637
return retain_bytes;
16351638
}
16361639

1637-
static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
1638-
gfp_t gfp_mask)
1640+
static void __scan(struct dm_bufio_client *c)
16391641
{
16401642
int l;
16411643
struct dm_buffer *b, *tmp;
@@ -1646,42 +1648,58 @@ static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
16461648

16471649
for (l = 0; l < LIST_SIZE; l++) {
16481650
list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) {
1649-
if (__try_evict_buffer(b, gfp_mask))
1651+
if (count - freed <= retain_target)
1652+
atomic_long_set(&c->need_shrink, 0);
1653+
if (!atomic_long_read(&c->need_shrink))
1654+
return;
1655+
if (__try_evict_buffer(b, GFP_KERNEL)) {
1656+
atomic_long_dec(&c->need_shrink);
16501657
freed++;
1651-
if (!--nr_to_scan || ((count - freed) <= retain_target))
1652-
return freed;
1658+
}
16531659
cond_resched();
16541660
}
16551661
}
1656-
return freed;
16571662
}
16581663

1659-
static unsigned long
1660-
dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
1664+
static void shrink_work(struct work_struct *w)
1665+
{
1666+
struct dm_bufio_client *c = container_of(w, struct dm_bufio_client, shrink_work);
1667+
1668+
dm_bufio_lock(c);
1669+
__scan(c);
1670+
dm_bufio_unlock(c);
1671+
}
1672+
1673+
static unsigned long dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
16611674
{
16621675
struct dm_bufio_client *c;
1663-
unsigned long freed;
16641676

16651677
c = container_of(shrink, struct dm_bufio_client, shrinker);
1666-
if (sc->gfp_mask & __GFP_FS)
1667-
dm_bufio_lock(c);
1668-
else if (!dm_bufio_trylock(c))
1669-
return SHRINK_STOP;
1678+
atomic_long_add(sc->nr_to_scan, &c->need_shrink);
1679+
queue_work(dm_bufio_wq, &c->shrink_work);
16701680

1671-
freed = __scan(c, sc->nr_to_scan, sc->gfp_mask);
1672-
dm_bufio_unlock(c);
1673-
return freed;
1681+
return sc->nr_to_scan;
16741682
}
16751683

1676-
static unsigned long
1677-
dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
1684+
static unsigned long dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
16781685
{
16791686
struct dm_bufio_client *c = container_of(shrink, struct dm_bufio_client, shrinker);
16801687
unsigned long count = READ_ONCE(c->n_buffers[LIST_CLEAN]) +
16811688
READ_ONCE(c->n_buffers[LIST_DIRTY]);
16821689
unsigned long retain_target = get_retain_buffers(c);
1690+
unsigned long queued_for_cleanup = atomic_long_read(&c->need_shrink);
1691+
1692+
if (unlikely(count < retain_target))
1693+
count = 0;
1694+
else
1695+
count -= retain_target;
16831696

1684-
return (count < retain_target) ? 0 : (count - retain_target);
1697+
if (unlikely(count < queued_for_cleanup))
1698+
count = 0;
1699+
else
1700+
count -= queued_for_cleanup;
1701+
1702+
return count;
16851703
}
16861704

16871705
/*
@@ -1772,6 +1790,9 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
17721790
__free_buffer_wake(b);
17731791
}
17741792

1793+
INIT_WORK(&c->shrink_work, shrink_work);
1794+
atomic_long_set(&c->need_shrink, 0);
1795+
17751796
c->shrinker.count_objects = dm_bufio_shrink_count;
17761797
c->shrinker.scan_objects = dm_bufio_shrink_scan;
17771798
c->shrinker.seeks = 1;
@@ -1817,6 +1838,7 @@ void dm_bufio_client_destroy(struct dm_bufio_client *c)
18171838
drop_buffers(c);
18181839

18191840
unregister_shrinker(&c->shrinker);
1841+
flush_work(&c->shrink_work);
18201842

18211843
mutex_lock(&dm_bufio_clients_lock);
18221844

0 commit comments

Comments
 (0)