Skip to content

Commit 813d4c6

Browse files
boryaskdave
authored andcommitted
btrfs: prevent pathological periodic reclaim loops
Periodic reclaim runs the risk of getting stuck in a state where it keeps reclaiming the same block group over and over. This can happen if 1. reclaiming that block_group fails 2. reclaiming that block_group fails to move any extents into existing block_groups and just allocates a fresh chunk and moves everything. Currently, 1. is a very tight loop inside the reclaim worker. That is critical for edge triggered reclaim or else we risk forgetting about a reclaimable group. On the other hand, with level triggered reclaim we can break out of that loop and get it later. With that fixed, 2. applies to both failures and "successes" with no progress. If we have done a periodic reclaim on a space_info and nothing has changed in that space_info, there is not much point to trying again, so don't, until enough space gets free, which we capture with a heuristic of needing to net free 1 chunk. Reviewed-by: Josef Bacik <[email protected]> Signed-off-by: Boris Burkov <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent e4ca393 commit 813d4c6

File tree

3 files changed

+71
-11
lines changed

3 files changed

+71
-11
lines changed

fs/btrfs/block-group.c

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1933,6 +1933,8 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
19331933
reclaimed = 0;
19341934
spin_lock(&space_info->lock);
19351935
space_info->reclaim_errors++;
1936+
if (READ_ONCE(space_info->periodic_reclaim))
1937+
space_info->periodic_reclaim_ready = false;
19361938
spin_unlock(&space_info->lock);
19371939
}
19381940
spin_lock(&space_info->lock);
@@ -1941,7 +1943,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
19411943
spin_unlock(&space_info->lock);
19421944

19431945
next:
1944-
if (ret) {
1946+
if (ret && !READ_ONCE(space_info->periodic_reclaim)) {
19451947
/* Refcount held by the reclaim_bgs list after splice. */
19461948
spin_lock(&fs_info->unused_bgs_lock);
19471949
/*
@@ -3686,6 +3688,8 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
36863688
space_info->bytes_reserved -= num_bytes;
36873689
space_info->bytes_used += num_bytes;
36883690
space_info->disk_used += num_bytes * factor;
3691+
if (READ_ONCE(space_info->periodic_reclaim))
3692+
btrfs_space_info_update_reclaimable(space_info, -num_bytes);
36893693
spin_unlock(&cache->lock);
36903694
spin_unlock(&space_info->lock);
36913695
} else {
@@ -3695,8 +3699,10 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
36953699
btrfs_space_info_update_bytes_pinned(info, space_info, num_bytes);
36963700
space_info->bytes_used -= num_bytes;
36973701
space_info->disk_used -= num_bytes * factor;
3698-
3699-
reclaim = should_reclaim_block_group(cache, num_bytes);
3702+
if (READ_ONCE(space_info->periodic_reclaim))
3703+
btrfs_space_info_update_reclaimable(space_info, num_bytes);
3704+
else
3705+
reclaim = should_reclaim_block_group(cache, num_bytes);
37003706

37013707
spin_unlock(&cache->lock);
37023708
spin_unlock(&space_info->lock);

fs/btrfs/space-info.c

Lines changed: 48 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
// SPDX-License-Identifier: GPL-2.0
22

3+
#include "linux/spinlock.h"
34
#include <linux/minmax.h>
45
#include "misc.h"
56
#include "ctree.h"
@@ -1915,7 +1916,9 @@ static u64 calc_pct_ratio(u64 x, u64 y)
19151916
*/
19161917
static u64 calc_unalloc_target(struct btrfs_fs_info *fs_info)
19171918
{
1918-
return BTRFS_UNALLOC_BLOCK_GROUP_TARGET * calc_effective_data_chunk_size(fs_info);
1919+
u64 chunk_sz = calc_effective_data_chunk_size(fs_info);
1920+
1921+
return BTRFS_UNALLOC_BLOCK_GROUP_TARGET * chunk_sz;
19191922
}
19201923

19211924
/*
@@ -1951,14 +1954,13 @@ static int calc_dynamic_reclaim_threshold(struct btrfs_space_info *space_info)
19511954
u64 unused = alloc - used;
19521955
u64 want = target > unalloc ? target - unalloc : 0;
19531956
u64 data_chunk_size = calc_effective_data_chunk_size(fs_info);
1954-
/* Cast to int is OK because want <= target */
1955-
int ratio = calc_pct_ratio(want, target);
19561957

1957-
/* If we have no unused space, don't bother, it won't work anyway */
1958+
/* If we have no unused space, don't bother, it won't work anyway. */
19581959
if (unused < data_chunk_size)
19591960
return 0;
19601961

1961-
return ratio;
1962+
/* Cast to int is OK because want <= target. */
1963+
return calc_pct_ratio(want, target);
19621964
}
19631965

19641966
int btrfs_calc_reclaim_threshold(struct btrfs_space_info *space_info)
@@ -2000,16 +2002,54 @@ static int do_reclaim_sweep(struct btrfs_fs_info *fs_info,
20002002
return 0;
20012003
}
20022004

2005+
void btrfs_space_info_update_reclaimable(struct btrfs_space_info *space_info, s64 bytes)
2006+
{
2007+
u64 chunk_sz = calc_effective_data_chunk_size(space_info->fs_info);
2008+
2009+
lockdep_assert_held(&space_info->lock);
2010+
space_info->reclaimable_bytes += bytes;
2011+
2012+
if (space_info->reclaimable_bytes >= chunk_sz)
2013+
btrfs_set_periodic_reclaim_ready(space_info, true);
2014+
}
2015+
2016+
void btrfs_set_periodic_reclaim_ready(struct btrfs_space_info *space_info, bool ready)
2017+
{
2018+
lockdep_assert_held(&space_info->lock);
2019+
if (!READ_ONCE(space_info->periodic_reclaim))
2020+
return;
2021+
if (ready != space_info->periodic_reclaim_ready) {
2022+
space_info->periodic_reclaim_ready = ready;
2023+
if (!ready)
2024+
space_info->reclaimable_bytes = 0;
2025+
}
2026+
}
2027+
2028+
bool btrfs_should_periodic_reclaim(struct btrfs_space_info *space_info)
2029+
{
2030+
bool ret;
2031+
2032+
if (space_info->flags & BTRFS_BLOCK_GROUP_SYSTEM)
2033+
return false;
2034+
if (!READ_ONCE(space_info->periodic_reclaim))
2035+
return false;
2036+
2037+
spin_lock(&space_info->lock);
2038+
ret = space_info->periodic_reclaim_ready;
2039+
btrfs_set_periodic_reclaim_ready(space_info, false);
2040+
spin_unlock(&space_info->lock);
2041+
2042+
return ret;
2043+
}
2044+
20032045
int btrfs_reclaim_sweep(struct btrfs_fs_info *fs_info)
20042046
{
20052047
int ret;
20062048
int raid;
20072049
struct btrfs_space_info *space_info;
20082050

20092051
list_for_each_entry(space_info, &fs_info->space_info, list) {
2010-
if (space_info->flags & BTRFS_BLOCK_GROUP_SYSTEM)
2011-
continue;
2012-
if (!READ_ONCE(space_info->periodic_reclaim))
2052+
if (!btrfs_should_periodic_reclaim(space_info))
20132053
continue;
20142054
for (raid = 0; raid < BTRFS_NR_RAID_TYPES; raid++) {
20152055
ret = do_reclaim_sweep(fs_info, space_info, raid);

fs/btrfs/space-info.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,17 @@ struct btrfs_space_info {
196196
* threshold in the cleaner thread.
197197
*/
198198
bool periodic_reclaim;
199+
200+
/*
201+
* Periodic reclaim should be a no-op if a space_info hasn't
202+
* freed any space since the last time we tried.
203+
*/
204+
bool periodic_reclaim_ready;
205+
206+
/*
207+
* Net bytes freed or allocated since the last reclaim pass.
208+
*/
209+
s64 reclaimable_bytes;
199210
};
200211

201212
struct reserve_ticket {
@@ -278,6 +289,9 @@ void btrfs_dump_space_info_for_trans_abort(struct btrfs_fs_info *fs_info);
278289
void btrfs_init_async_reclaim_work(struct btrfs_fs_info *fs_info);
279290
u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
280291

292+
void btrfs_space_info_update_reclaimable(struct btrfs_space_info *space_info, s64 bytes);
293+
void btrfs_set_periodic_reclaim_ready(struct btrfs_space_info *space_info, bool ready);
294+
bool btrfs_should_periodic_reclaim(struct btrfs_space_info *space_info);
281295
int btrfs_calc_reclaim_threshold(struct btrfs_space_info *space_info);
282296
int btrfs_reclaim_sweep(struct btrfs_fs_info *fs_info);
283297

0 commit comments

Comments
 (0)