Skip to content

Commit 8da8d88

Browse files
committed
Merge tag 'for-6.8-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs fixes from David Sterba: - Fix a deadlock in fiemap. There was a big lock around the whole operation that can interfere with a page fault and mkwrite. Reducing the lock scope can also speed up fiemap - Fix range condition for extent defragmentation which could lead to worse layout in some cases * tag 'for-6.8-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: btrfs: fix deadlock with fiemap and extent locking btrfs: defrag: avoid unnecessary defrag caused by incorrect extent size
2 parents d8be5a5 + b0ad381 commit 8da8d88

File tree

2 files changed

+46
-18
lines changed

2 files changed

+46
-18
lines changed

fs/btrfs/defrag.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1046,7 +1046,7 @@ static int defrag_collect_targets(struct btrfs_inode *inode,
10461046
goto add;
10471047

10481048
/* Skip too large extent */
1049-
if (range_len >= extent_thresh)
1049+
if (em->len >= extent_thresh)
10501050
goto next;
10511051

10521052
/*

fs/btrfs/extent_io.c

Lines changed: 45 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2689,16 +2689,34 @@ static int fiemap_process_hole(struct btrfs_inode *inode,
26892689
* it beyond i_size.
26902690
*/
26912691
while (cur_offset < end && cur_offset < i_size) {
2692+
struct extent_state *cached_state = NULL;
26922693
u64 delalloc_start;
26932694
u64 delalloc_end;
26942695
u64 prealloc_start;
2696+
u64 lockstart;
2697+
u64 lockend;
26952698
u64 prealloc_len = 0;
26962699
bool delalloc;
26972700

2701+
lockstart = round_down(cur_offset, inode->root->fs_info->sectorsize);
2702+
lockend = round_up(end, inode->root->fs_info->sectorsize);
2703+
2704+
/*
2705+
* We are only locking for the delalloc range because that's the
2706+
* only thing that can change here. With fiemap we have a lock
2707+
* on the inode, so no buffered or direct writes can happen.
2708+
*
2709+
* However mmaps and normal page writeback will cause this to
2710+
* change arbitrarily. We have to lock the extent lock here to
2711+
* make sure that nobody messes with the tree while we're doing
2712+
* btrfs_find_delalloc_in_range.
2713+
*/
2714+
lock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
26982715
delalloc = btrfs_find_delalloc_in_range(inode, cur_offset, end,
26992716
delalloc_cached_state,
27002717
&delalloc_start,
27012718
&delalloc_end);
2719+
unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
27022720
if (!delalloc)
27032721
break;
27042722

@@ -2866,15 +2884,15 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
28662884
u64 start, u64 len)
28672885
{
28682886
const u64 ino = btrfs_ino(inode);
2869-
struct extent_state *cached_state = NULL;
28702887
struct extent_state *delalloc_cached_state = NULL;
28712888
struct btrfs_path *path;
28722889
struct fiemap_cache cache = { 0 };
28732890
struct btrfs_backref_share_check_ctx *backref_ctx;
28742891
u64 last_extent_end;
28752892
u64 prev_extent_end;
2876-
u64 lockstart;
2877-
u64 lockend;
2893+
u64 range_start;
2894+
u64 range_end;
2895+
const u64 sectorsize = inode->root->fs_info->sectorsize;
28782896
bool stopped = false;
28792897
int ret;
28802898

@@ -2885,20 +2903,19 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
28852903
goto out;
28862904
}
28872905

2888-
lockstart = round_down(start, inode->root->fs_info->sectorsize);
2889-
lockend = round_up(start + len, inode->root->fs_info->sectorsize);
2890-
prev_extent_end = lockstart;
2906+
range_start = round_down(start, sectorsize);
2907+
range_end = round_up(start + len, sectorsize);
2908+
prev_extent_end = range_start;
28912909

28922910
btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED);
2893-
lock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
28942911

28952912
ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end);
28962913
if (ret < 0)
28972914
goto out_unlock;
28982915
btrfs_release_path(path);
28992916

29002917
path->reada = READA_FORWARD;
2901-
ret = fiemap_search_slot(inode, path, lockstart);
2918+
ret = fiemap_search_slot(inode, path, range_start);
29022919
if (ret < 0) {
29032920
goto out_unlock;
29042921
} else if (ret > 0) {
@@ -2910,7 +2927,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
29102927
goto check_eof_delalloc;
29112928
}
29122929

2913-
while (prev_extent_end < lockend) {
2930+
while (prev_extent_end < range_end) {
29142931
struct extent_buffer *leaf = path->nodes[0];
29152932
struct btrfs_file_extent_item *ei;
29162933
struct btrfs_key key;
@@ -2933,19 +2950,19 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
29332950
* The first iteration can leave us at an extent item that ends
29342951
* before our range's start. Move to the next item.
29352952
*/
2936-
if (extent_end <= lockstart)
2953+
if (extent_end <= range_start)
29372954
goto next_item;
29382955

29392956
backref_ctx->curr_leaf_bytenr = leaf->start;
29402957

29412958
/* We have in implicit hole (NO_HOLES feature enabled). */
29422959
if (prev_extent_end < key.offset) {
2943-
const u64 range_end = min(key.offset, lockend) - 1;
2960+
const u64 hole_end = min(key.offset, range_end) - 1;
29442961

29452962
ret = fiemap_process_hole(inode, fieinfo, &cache,
29462963
&delalloc_cached_state,
29472964
backref_ctx, 0, 0, 0,
2948-
prev_extent_end, range_end);
2965+
prev_extent_end, hole_end);
29492966
if (ret < 0) {
29502967
goto out_unlock;
29512968
} else if (ret > 0) {
@@ -2955,7 +2972,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
29552972
}
29562973

29572974
/* We've reached the end of the fiemap range, stop. */
2958-
if (key.offset >= lockend) {
2975+
if (key.offset >= range_end) {
29592976
stopped = true;
29602977
break;
29612978
}
@@ -3049,29 +3066,41 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
30493066
btrfs_free_path(path);
30503067
path = NULL;
30513068

3052-
if (!stopped && prev_extent_end < lockend) {
3069+
if (!stopped && prev_extent_end < range_end) {
30533070
ret = fiemap_process_hole(inode, fieinfo, &cache,
30543071
&delalloc_cached_state, backref_ctx,
3055-
0, 0, 0, prev_extent_end, lockend - 1);
3072+
0, 0, 0, prev_extent_end, range_end - 1);
30563073
if (ret < 0)
30573074
goto out_unlock;
3058-
prev_extent_end = lockend;
3075+
prev_extent_end = range_end;
30593076
}
30603077

30613078
if (cache.cached && cache.offset + cache.len >= last_extent_end) {
30623079
const u64 i_size = i_size_read(&inode->vfs_inode);
30633080

30643081
if (prev_extent_end < i_size) {
3082+
struct extent_state *cached_state = NULL;
30653083
u64 delalloc_start;
30663084
u64 delalloc_end;
3085+
u64 lockstart;
3086+
u64 lockend;
30673087
bool delalloc;
30683088

3089+
lockstart = round_down(prev_extent_end, sectorsize);
3090+
lockend = round_up(i_size, sectorsize);
3091+
3092+
/*
3093+
* See the comment in fiemap_process_hole as to why
3094+
* we're doing the locking here.
3095+
*/
3096+
lock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
30693097
delalloc = btrfs_find_delalloc_in_range(inode,
30703098
prev_extent_end,
30713099
i_size - 1,
30723100
&delalloc_cached_state,
30733101
&delalloc_start,
30743102
&delalloc_end);
3103+
unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
30753104
if (!delalloc)
30763105
cache.flags |= FIEMAP_EXTENT_LAST;
30773106
} else {
@@ -3082,7 +3111,6 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
30823111
ret = emit_last_fiemap_cache(fieinfo, &cache);
30833112

30843113
out_unlock:
3085-
unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
30863114
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
30873115
out:
30883116
free_extent_state(delalloc_cached_state);

0 commit comments

Comments
 (0)