Skip to content

Commit a1b547f

Browse files
committed
Merge tag 'for-6.11-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba: "The highlights are new logic behind background block group reclaim, automatic removal of qgroup after removing a subvolume and new 'rescue=' mount options. The rest is optimizations, cleanups and refactoring. User visible features: - dynamic block group reclaim: - tunable framework to avoid situations where eager data allocations prevent creating new metadata chunks due to lack of unallocated space - reuse sysfs knob bg_reclaim_threshold (otherwise used only in zoned mode) for a fixed value threshold - new on/off sysfs knob "dynamic_reclaim" calculating the value based on heuristics, aiming to keep spare working space for relocating chunks but not to needlessly relocate partially utilized block groups or reclaim newly allocated ones - stats are exported in sysfs per block group type, files "reclaim_*" - this may increase IO load at unexpected times but the corner case of no allocatable block groups is known to be worse - automatically remove qgroup of deleted subvolumes: - adjust qgroup removal conditions, make sure all related subvolume data are already removed, or return EBUSY, also take into account setting of sysfs drop_subtree_threshold - also works in squota mode - mount option updates: new modes of 'rescue=' that allow to mount images (read-only) that could have been partially converted by user space tools - ignoremetacsums - invalid metadata checksums are ignored - ignoresuperflags - super block flags that track conversion in progress (like UUID or checksums) Core: - size of struct btrfs_inode is now below 1024 (on a release config), improved memory packing and other secondary effects - switch tracking of open inodes from rb-tree to xarray, minor performance improvement - reduce number of empty transaction commits when there are no dirty data/metadata - memory allocation optimizations (reduced numbers, reordering out of critical sections) - extent map structure optimizations and refactoring, more sanity checks - more subpage in zoned mode preparations or fixes - general snapshot code cleanups, improvements and documentation - tree-checker updates: more file extent ram_bytes fixes, continued - raid-stripe-tree update (not backward compatible): - remove extent encoding field from the structure, can be inferred from other information - requires btrfs-progs 6.9.1 or newer - cleanups and refactoring - error message updates - error handling improvements - return type and parameter cleanups and improvements" * tag 'for-6.11-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (152 commits) btrfs: fix extent map use-after-free when adding pages to compressed bio btrfs: fix bitmap leak when loading free space cache on duplicate entry btrfs: remove the BUG_ON() inside extent_range_clear_dirty_for_io() btrfs: move extent_range_clear_dirty_for_io() into inode.c btrfs: enhance compression error messages btrfs: fix data race when accessing the last_trans field of a root btrfs: rename the extra_gfp parameter of btrfs_alloc_page_array() btrfs: remove the extra_gfp parameter from btrfs_alloc_folio_array() btrfs: introduce new "rescue=ignoresuperflags" mount option btrfs: introduce new "rescue=ignoremetacsums" mount option btrfs: output the unrecognized super block flags as hex btrfs: remove unused Opt enums btrfs: tree-checker: add extra ram_bytes and disk_num_bytes check btrfs: fix the ram_bytes assignment for truncated ordered extents btrfs: make validate_extent_map() catch ram_bytes mismatch btrfs: ignore incorrect btrfs_file_extent_item::ram_bytes btrfs: cleanup the bytenr usage inside btrfs_extent_item_to_extent_map() btrfs: fix typo in error message in btrfs_validate_super() btrfs: move the direct IO code into its own file btrfs: pass a btrfs_inode to btrfs_set_prop() ...
2 parents 6706415 + 8e78605 commit a1b547f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

93 files changed

+5175
-3905
lines changed

fs/btrfs/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
3333
uuid-tree.o props.o free-space-tree.o tree-checker.o space-info.o \
3434
block-rsv.o delalloc-space.o block-group.o discard.o reflink.o \
3535
subpage.o tree-mod-log.o extent-io-tree.o fs.o messages.o bio.o \
36-
lru_cache.o raid-stripe-tree.o
36+
lru_cache.o raid-stripe-tree.o fiemap.o direct-io.o
3737

3838
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
3939
btrfs-$(CONFIG_BTRFS_FS_REF_VERIFY) += ref-verify.o

fs/btrfs/accessors.h

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ void btrfs_init_map_token(struct btrfs_map_token *token, struct extent_buffer *e
3434

3535
static inline u8 get_unaligned_le8(const void *p)
3636
{
37-
return *(u8 *)p;
37+
return *(const u8 *)p;
3838
}
3939

4040
static inline void put_unaligned_le8(u8 val, void *p)
@@ -48,8 +48,8 @@ static inline void put_unaligned_le8(u8 val, void *p)
4848
offsetof(type, member), \
4949
sizeof_field(type, member)))
5050

51-
#define write_eb_member(eb, ptr, type, member, result) (\
52-
write_extent_buffer(eb, (char *)(result), \
51+
#define write_eb_member(eb, ptr, type, member, source) ( \
52+
write_extent_buffer(eb, (const char *)(source), \
5353
((unsigned long)(ptr)) + \
5454
offsetof(type, member), \
5555
sizeof_field(type, member)))
@@ -315,11 +315,8 @@ BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32);
315315
BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64);
316316
BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec, nsec, 32);
317317

318-
BTRFS_SETGET_FUNCS(stripe_extent_encoding, struct btrfs_stripe_extent, encoding, 8);
319318
BTRFS_SETGET_FUNCS(raid_stride_devid, struct btrfs_raid_stride, devid, 64);
320319
BTRFS_SETGET_FUNCS(raid_stride_physical, struct btrfs_raid_stride, physical, 64);
321-
BTRFS_SETGET_STACK_FUNCS(stack_stripe_extent_encoding,
322-
struct btrfs_stripe_extent, encoding, 8);
323320
BTRFS_SETGET_STACK_FUNCS(stack_raid_stride_devid, struct btrfs_raid_stride, devid, 64);
324321
BTRFS_SETGET_STACK_FUNCS(stack_raid_stride_physical, struct btrfs_raid_stride, physical, 64);
325322

@@ -353,7 +350,7 @@ static inline void btrfs_tree_block_key(const struct extent_buffer *eb,
353350

354351
static inline void btrfs_set_tree_block_key(const struct extent_buffer *eb,
355352
struct btrfs_tree_block_info *item,
356-
struct btrfs_disk_key *key)
353+
const struct btrfs_disk_key *key)
357354
{
358355
write_eb_member(eb, item, struct btrfs_tree_block_info, key, key);
359356
}
@@ -446,7 +443,7 @@ void btrfs_node_key(const struct extent_buffer *eb,
446443
struct btrfs_disk_key *disk_key, int nr);
447444

448445
static inline void btrfs_set_node_key(const struct extent_buffer *eb,
449-
struct btrfs_disk_key *disk_key, int nr)
446+
const struct btrfs_disk_key *disk_key, int nr)
450447
{
451448
unsigned long ptr;
452449

@@ -512,7 +509,7 @@ static inline void btrfs_item_key(const struct extent_buffer *eb,
512509
}
513510

514511
static inline void btrfs_set_item_key(struct extent_buffer *eb,
515-
struct btrfs_disk_key *disk_key, int nr)
512+
const struct btrfs_disk_key *disk_key, int nr)
516513
{
517514
struct btrfs_item *item = btrfs_item_nr(eb, nr);
518515

fs/btrfs/bio.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ struct btrfs_failed_bio {
2929
/* Is this a data path I/O that needs storage layer checksum and repair? */
3030
static inline bool is_data_bbio(struct btrfs_bio *bbio)
3131
{
32-
return bbio->inode && is_data_inode(&bbio->inode->vfs_inode);
32+
return bbio->inode && is_data_inode(bbio->inode);
3333
}
3434

3535
static bool bbio_has_ordered_extent(struct btrfs_bio *bbio)
@@ -732,7 +732,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
732732
* point, so they are handled as part of the no-checksum case.
733733
*/
734734
if (inode && !(inode->flags & BTRFS_INODE_NODATASUM) &&
735-
!test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state) &&
735+
!test_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state) &&
736736
!btrfs_is_data_reloc_root(inode->root)) {
737737
if (should_async_write(bbio) &&
738738
btrfs_wq_submit_bio(bbio, bioc, &smap, mirror_num))

fs/btrfs/block-group.c

Lines changed: 40 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1022,6 +1022,13 @@ static void clear_incompat_bg_bits(struct btrfs_fs_info *fs_info, u64 flags)
10221022
}
10231023
}
10241024

1025+
static struct btrfs_root *btrfs_block_group_root(struct btrfs_fs_info *fs_info)
1026+
{
1027+
if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE))
1028+
return fs_info->block_group_root;
1029+
return btrfs_extent_root(fs_info, 0);
1030+
}
1031+
10251032
static int remove_block_group_item(struct btrfs_trans_handle *trans,
10261033
struct btrfs_path *path,
10271034
struct btrfs_block_group *block_group)
@@ -1757,24 +1764,21 @@ static inline bool btrfs_should_reclaim(struct btrfs_fs_info *fs_info)
17571764

17581765
static bool should_reclaim_block_group(struct btrfs_block_group *bg, u64 bytes_freed)
17591766
{
1760-
const struct btrfs_space_info *space_info = bg->space_info;
1761-
const int reclaim_thresh = READ_ONCE(space_info->bg_reclaim_threshold);
1767+
const int thresh_pct = btrfs_calc_reclaim_threshold(bg->space_info);
1768+
u64 thresh_bytes = mult_perc(bg->length, thresh_pct);
17621769
const u64 new_val = bg->used;
17631770
const u64 old_val = new_val + bytes_freed;
1764-
u64 thresh;
17651771

1766-
if (reclaim_thresh == 0)
1772+
if (thresh_bytes == 0)
17671773
return false;
17681774

1769-
thresh = mult_perc(bg->length, reclaim_thresh);
1770-
17711775
/*
17721776
* If we were below the threshold before don't reclaim, we are likely a
17731777
* brand new block group and we don't want to relocate new block groups.
17741778
*/
1775-
if (old_val < thresh)
1779+
if (old_val < thresh_bytes)
17761780
return false;
1777-
if (new_val >= thresh)
1781+
if (new_val >= thresh_bytes)
17781782
return false;
17791783
return true;
17801784
}
@@ -1822,6 +1826,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
18221826
list_sort(NULL, &fs_info->reclaim_bgs, reclaim_bgs_cmp);
18231827
while (!list_empty(&fs_info->reclaim_bgs)) {
18241828
u64 zone_unusable;
1829+
u64 reclaimed;
18251830
int ret = 0;
18261831

18271832
bg = list_first_entry(&fs_info->reclaim_bgs,
@@ -1835,6 +1840,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
18351840
/* Don't race with allocators so take the groups_sem */
18361841
down_write(&space_info->groups_sem);
18371842

1843+
spin_lock(&space_info->lock);
18381844
spin_lock(&bg->lock);
18391845
if (bg->reserved || bg->pinned || bg->ro) {
18401846
/*
@@ -1844,6 +1850,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
18441850
* this block group.
18451851
*/
18461852
spin_unlock(&bg->lock);
1853+
spin_unlock(&space_info->lock);
18471854
up_write(&space_info->groups_sem);
18481855
goto next;
18491856
}
@@ -1862,6 +1869,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
18621869
if (!btrfs_test_opt(fs_info, DISCARD_ASYNC))
18631870
btrfs_mark_bg_unused(bg);
18641871
spin_unlock(&bg->lock);
1872+
spin_unlock(&space_info->lock);
18651873
up_write(&space_info->groups_sem);
18661874
goto next;
18671875

@@ -1878,10 +1886,12 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
18781886
*/
18791887
if (!should_reclaim_block_group(bg, bg->length)) {
18801888
spin_unlock(&bg->lock);
1889+
spin_unlock(&space_info->lock);
18811890
up_write(&space_info->groups_sem);
18821891
goto next;
18831892
}
18841893
spin_unlock(&bg->lock);
1894+
spin_unlock(&space_info->lock);
18851895

18861896
/*
18871897
* Get out fast, in case we're read-only or unmounting the
@@ -1914,15 +1924,26 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
19141924
div64_u64(bg->used * 100, bg->length),
19151925
div64_u64(zone_unusable * 100, bg->length));
19161926
trace_btrfs_reclaim_block_group(bg);
1927+
reclaimed = bg->used;
19171928
ret = btrfs_relocate_chunk(fs_info, bg->start);
19181929
if (ret) {
19191930
btrfs_dec_block_group_ro(bg);
19201931
btrfs_err(fs_info, "error relocating chunk %llu",
19211932
bg->start);
1933+
reclaimed = 0;
1934+
spin_lock(&space_info->lock);
1935+
space_info->reclaim_errors++;
1936+
if (READ_ONCE(space_info->periodic_reclaim))
1937+
space_info->periodic_reclaim_ready = false;
1938+
spin_unlock(&space_info->lock);
19221939
}
1940+
spin_lock(&space_info->lock);
1941+
space_info->reclaim_count++;
1942+
space_info->reclaim_bytes += reclaimed;
1943+
spin_unlock(&space_info->lock);
19231944

19241945
next:
1925-
if (ret) {
1946+
if (ret && !READ_ONCE(space_info->periodic_reclaim)) {
19261947
/* Refcount held by the reclaim_bgs list after splice. */
19271948
spin_lock(&fs_info->unused_bgs_lock);
19281949
/*
@@ -1964,6 +1985,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
19641985

19651986
void btrfs_reclaim_bgs(struct btrfs_fs_info *fs_info)
19661987
{
1988+
btrfs_reclaim_sweep(fs_info);
19671989
spin_lock(&fs_info->unused_bgs_lock);
19681990
if (!list_empty(&fs_info->reclaim_bgs))
19691991
queue_work(system_unbound_wq, &fs_info->reclaim_bgs_work);
@@ -3662,9 +3684,12 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
36623684
old_val += num_bytes;
36633685
cache->used = old_val;
36643686
cache->reserved -= num_bytes;
3687+
cache->reclaim_mark = 0;
36653688
space_info->bytes_reserved -= num_bytes;
36663689
space_info->bytes_used += num_bytes;
36673690
space_info->disk_used += num_bytes * factor;
3691+
if (READ_ONCE(space_info->periodic_reclaim))
3692+
btrfs_space_info_update_reclaimable(space_info, -num_bytes);
36683693
spin_unlock(&cache->lock);
36693694
spin_unlock(&space_info->lock);
36703695
} else {
@@ -3674,8 +3699,10 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
36743699
btrfs_space_info_update_bytes_pinned(info, space_info, num_bytes);
36753700
space_info->bytes_used -= num_bytes;
36763701
space_info->disk_used -= num_bytes * factor;
3677-
3678-
reclaim = should_reclaim_block_group(cache, num_bytes);
3702+
if (READ_ONCE(space_info->periodic_reclaim))
3703+
btrfs_space_info_update_reclaimable(space_info, num_bytes);
3704+
else
3705+
reclaim = should_reclaim_block_group(cache, num_bytes);
36793706

36803707
spin_unlock(&cache->lock);
36813708
spin_unlock(&space_info->lock);
@@ -4329,13 +4356,13 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
43294356
spin_lock(&block_group->lock);
43304357
if (test_and_clear_bit(BLOCK_GROUP_FLAG_IREF,
43314358
&block_group->runtime_flags)) {
4332-
struct inode *inode = block_group->inode;
4359+
struct btrfs_inode *inode = block_group->inode;
43334360

43344361
block_group->inode = NULL;
43354362
spin_unlock(&block_group->lock);
43364363

43374364
ASSERT(block_group->io_ctl.inode == NULL);
4338-
iput(inode);
4365+
iput(&inode->vfs_inode);
43394366
} else {
43404367
spin_unlock(&block_group->lock);
43414368
}

fs/btrfs/block-group.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ struct btrfs_caching_control {
115115

116116
struct btrfs_block_group {
117117
struct btrfs_fs_info *fs_info;
118-
struct inode *inode;
118+
struct btrfs_inode *inode;
119119
spinlock_t lock;
120120
u64 start;
121121
u64 length;
@@ -263,6 +263,7 @@ struct btrfs_block_group {
263263
struct work_struct zone_finish_work;
264264
struct extent_buffer *last_eb;
265265
enum btrfs_block_group_size_class size_class;
266+
u64 reclaim_mark;
266267
};
267268

268269
static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group)

0 commit comments

Comments
 (0)