Skip to content

Commit e4ca393

Browse files
boryaskdave
authored andcommitted
btrfs: periodic block_group reclaim
We currently employ a edge-triggered block group reclaim strategy which marks block groups for reclaim as they free down past a threshold. With a dynamic threshold, this is worse than doing it in a level-triggered fashion periodically. That is because the reclaim itself happens periodically, so the threshold at that point in time is what really matters, not the threshold at freeing time. If we mark the reclaim in a big pass, then sort by usage and do reclaim, we also benefit from a negative feedback loop preventing unnecessary reclaims as we crunch through the "best" candidates. Since this is quite a different model, it requires some additional support. The edge triggered reclaim has a good heuristic for not reclaiming fresh block groups, so we need to replace that with a typical GC sweep mark which skips block groups that have seen an allocation since the last sweep. Reviewed-by: Josef Bacik <[email protected]> Signed-off-by: Boris Burkov <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent f5ff64c commit e4ca393

File tree

5 files changed

+95
-0
lines changed

5 files changed

+95
-0
lines changed

fs/btrfs/block-group.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1983,6 +1983,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
19831983

19841984
void btrfs_reclaim_bgs(struct btrfs_fs_info *fs_info)
19851985
{
1986+
btrfs_reclaim_sweep(fs_info);
19861987
spin_lock(&fs_info->unused_bgs_lock);
19871988
if (!list_empty(&fs_info->reclaim_bgs))
19881989
queue_work(system_unbound_wq, &fs_info->reclaim_bgs_work);
@@ -3681,6 +3682,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
36813682
old_val += num_bytes;
36823683
cache->used = old_val;
36833684
cache->reserved -= num_bytes;
3685+
cache->reclaim_mark = 0;
36843686
space_info->bytes_reserved -= num_bytes;
36853687
space_info->bytes_used += num_bytes;
36863688
space_info->disk_used += num_bytes * factor;

fs/btrfs/block-group.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,7 @@ struct btrfs_block_group {
263263
struct work_struct zone_finish_work;
264264
struct extent_buffer *last_eb;
265265
enum btrfs_block_group_size_class size_class;
266+
u64 reclaim_mark;
266267
};
267268

268269
static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group)

fs/btrfs/space-info.c

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1969,3 +1969,54 @@ int btrfs_calc_reclaim_threshold(struct btrfs_space_info *space_info)
19691969
return calc_dynamic_reclaim_threshold(space_info);
19701970
return READ_ONCE(space_info->bg_reclaim_threshold);
19711971
}
1972+
1973+
static int do_reclaim_sweep(struct btrfs_fs_info *fs_info,
1974+
struct btrfs_space_info *space_info, int raid)
1975+
{
1976+
struct btrfs_block_group *bg;
1977+
int thresh_pct;
1978+
1979+
spin_lock(&space_info->lock);
1980+
thresh_pct = btrfs_calc_reclaim_threshold(space_info);
1981+
spin_unlock(&space_info->lock);
1982+
1983+
down_read(&space_info->groups_sem);
1984+
list_for_each_entry(bg, &space_info->block_groups[raid], list) {
1985+
u64 thresh;
1986+
bool reclaim = false;
1987+
1988+
btrfs_get_block_group(bg);
1989+
spin_lock(&bg->lock);
1990+
thresh = mult_perc(bg->length, thresh_pct);
1991+
if (bg->used < thresh && bg->reclaim_mark)
1992+
reclaim = true;
1993+
bg->reclaim_mark++;
1994+
spin_unlock(&bg->lock);
1995+
if (reclaim)
1996+
btrfs_mark_bg_to_reclaim(bg);
1997+
btrfs_put_block_group(bg);
1998+
}
1999+
up_read(&space_info->groups_sem);
2000+
return 0;
2001+
}
2002+
2003+
int btrfs_reclaim_sweep(struct btrfs_fs_info *fs_info)
2004+
{
2005+
int ret;
2006+
int raid;
2007+
struct btrfs_space_info *space_info;
2008+
2009+
list_for_each_entry(space_info, &fs_info->space_info, list) {
2010+
if (space_info->flags & BTRFS_BLOCK_GROUP_SYSTEM)
2011+
continue;
2012+
if (!READ_ONCE(space_info->periodic_reclaim))
2013+
continue;
2014+
for (raid = 0; raid < BTRFS_NR_RAID_TYPES; raid++) {
2015+
ret = do_reclaim_sweep(fs_info, space_info, raid);
2016+
if (ret)
2017+
return ret;
2018+
}
2019+
}
2020+
2021+
return ret;
2022+
}

fs/btrfs/space-info.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,12 @@ struct btrfs_space_info {
190190
* fixed bg_reclaim_threshold.
191191
*/
192192
bool dynamic_reclaim;
193+
194+
/*
195+
* Periodically check all block groups against the reclaim
196+
* threshold in the cleaner thread.
197+
*/
198+
bool periodic_reclaim;
193199
};
194200

195201
struct reserve_ticket {
@@ -273,5 +279,6 @@ void btrfs_init_async_reclaim_work(struct btrfs_fs_info *fs_info);
273279
u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
274280

275281
int btrfs_calc_reclaim_threshold(struct btrfs_space_info *space_info);
282+
int btrfs_reclaim_sweep(struct btrfs_fs_info *fs_info);
276283

277284
#endif /* BTRFS_SPACE_INFO_H */

fs/btrfs/sysfs.c

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -973,6 +973,39 @@ BTRFS_ATTR_RW(space_info, dynamic_reclaim,
973973
btrfs_sinfo_dynamic_reclaim_show,
974974
btrfs_sinfo_dynamic_reclaim_store);
975975

976+
static ssize_t btrfs_sinfo_periodic_reclaim_show(struct kobject *kobj,
977+
struct kobj_attribute *a,
978+
char *buf)
979+
{
980+
struct btrfs_space_info *space_info = to_space_info(kobj);
981+
982+
return sysfs_emit(buf, "%d\n", READ_ONCE(space_info->periodic_reclaim));
983+
}
984+
985+
static ssize_t btrfs_sinfo_periodic_reclaim_store(struct kobject *kobj,
986+
struct kobj_attribute *a,
987+
const char *buf, size_t len)
988+
{
989+
struct btrfs_space_info *space_info = to_space_info(kobj);
990+
int periodic_reclaim;
991+
int ret;
992+
993+
ret = kstrtoint(buf, 10, &periodic_reclaim);
994+
if (ret)
995+
return ret;
996+
997+
if (periodic_reclaim < 0)
998+
return -EINVAL;
999+
1000+
WRITE_ONCE(space_info->periodic_reclaim, periodic_reclaim != 0);
1001+
1002+
return len;
1003+
}
1004+
1005+
BTRFS_ATTR_RW(space_info, periodic_reclaim,
1006+
btrfs_sinfo_periodic_reclaim_show,
1007+
btrfs_sinfo_periodic_reclaim_store);
1008+
9761009
/*
9771010
* Allocation information about block group types.
9781011
*
@@ -996,6 +1029,7 @@ static struct attribute *space_info_attrs[] = {
9961029
BTRFS_ATTR_PTR(space_info, reclaim_count),
9971030
BTRFS_ATTR_PTR(space_info, reclaim_bytes),
9981031
BTRFS_ATTR_PTR(space_info, reclaim_errors),
1032+
BTRFS_ATTR_PTR(space_info, periodic_reclaim),
9991033
#ifdef CONFIG_BTRFS_DEBUG
10001034
BTRFS_ATTR_PTR(space_info, force_chunk_alloc),
10011035
#endif

0 commit comments

Comments
 (0)