Skip to content

Commit 7505aa1

Browse files
committed
Merge tag 'for-6.8-rc6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs fixes from David Sterba: - fix freeing allocated id for anon dev when snapshot creation fails - fiemap fixes: - followup for a recent deadlock fix, ranges that fiemap can access can still race with ordered extent completion - make sure fiemap with SYNC flag does not race with writes * tag 'for-6.8-rc6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: btrfs: fix double free of anonymous device after snapshot creation failure btrfs: ensure fiemap doesn't race with writes when FIEMAP_FLAG_SYNC is given btrfs: fix race between ordered extent completion and fiemap
2 parents 3aec97e + e2b54ea commit 7505aa1

File tree

6 files changed

+139
-35
lines changed

6 files changed

+139
-35
lines changed

fs/btrfs/disk-io.c

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1307,12 +1307,12 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
13071307
*
13081308
* @objectid: root id
13091309
* @anon_dev: preallocated anonymous block device number for new roots,
1310-
* pass 0 for new allocation.
1310+
* pass NULL for a new allocation.
13111311
* @check_ref: whether to check root item references, If true, return -ENOENT
13121312
* for orphan roots
13131313
*/
13141314
static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
1315-
u64 objectid, dev_t anon_dev,
1315+
u64 objectid, dev_t *anon_dev,
13161316
bool check_ref)
13171317
{
13181318
struct btrfs_root *root;
@@ -1342,9 +1342,9 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
13421342
* that common but still possible. In that case, we just need
13431343
* to free the anon_dev.
13441344
*/
1345-
if (unlikely(anon_dev)) {
1346-
free_anon_bdev(anon_dev);
1347-
anon_dev = 0;
1345+
if (unlikely(anon_dev && *anon_dev)) {
1346+
free_anon_bdev(*anon_dev);
1347+
*anon_dev = 0;
13481348
}
13491349

13501350
if (check_ref && btrfs_root_refs(&root->root_item) == 0) {
@@ -1366,7 +1366,7 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
13661366
goto fail;
13671367
}
13681368

1369-
ret = btrfs_init_fs_root(root, anon_dev);
1369+
ret = btrfs_init_fs_root(root, anon_dev ? *anon_dev : 0);
13701370
if (ret)
13711371
goto fail;
13721372

@@ -1402,7 +1402,7 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
14021402
* root's anon_dev to 0 to avoid a double free, once by btrfs_put_root()
14031403
* and once again by our caller.
14041404
*/
1405-
if (anon_dev)
1405+
if (anon_dev && *anon_dev)
14061406
root->anon_dev = 0;
14071407
btrfs_put_root(root);
14081408
return ERR_PTR(ret);
@@ -1418,19 +1418,19 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
14181418
struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
14191419
u64 objectid, bool check_ref)
14201420
{
1421-
return btrfs_get_root_ref(fs_info, objectid, 0, check_ref);
1421+
return btrfs_get_root_ref(fs_info, objectid, NULL, check_ref);
14221422
}
14231423

14241424
/*
14251425
* Get in-memory reference of a root structure, created as new, optionally pass
14261426
* the anonymous block device id
14271427
*
14281428
* @objectid: tree objectid
1429-
* @anon_dev: if zero, allocate a new anonymous block device or use the
1430-
* parameter value
1429+
* @anon_dev: if NULL, allocate a new anonymous block device or use the
1430+
* parameter value if not NULL
14311431
*/
14321432
struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
1433-
u64 objectid, dev_t anon_dev)
1433+
u64 objectid, dev_t *anon_dev)
14341434
{
14351435
return btrfs_get_root_ref(fs_info, objectid, anon_dev, true);
14361436
}

fs/btrfs/disk-io.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info);
6161
struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
6262
u64 objectid, bool check_ref);
6363
struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
64-
u64 objectid, dev_t anon_dev);
64+
u64 objectid, dev_t *anon_dev);
6565
struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info,
6666
struct btrfs_path *path,
6767
u64 objectid);

fs/btrfs/extent_io.c

Lines changed: 104 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2480,6 +2480,7 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
24802480
struct fiemap_cache *cache,
24812481
u64 offset, u64 phys, u64 len, u32 flags)
24822482
{
2483+
u64 cache_end;
24832484
int ret = 0;
24842485

24852486
/* Set at the end of extent_fiemap(). */
@@ -2489,15 +2490,102 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
24892490
goto assign;
24902491

24912492
/*
2492-
* Sanity check, extent_fiemap() should have ensured that new
2493-
* fiemap extent won't overlap with cached one.
2494-
* Not recoverable.
2493+
* When iterating the extents of the inode, at extent_fiemap(), we may
2494+
* find an extent that starts at an offset behind the end offset of the
2495+
* previous extent we processed. This happens if fiemap is called
2496+
* without FIEMAP_FLAG_SYNC and there are ordered extents completing
2497+
* while we call btrfs_next_leaf() (through fiemap_next_leaf_item()).
24952498
*
2496-
* NOTE: Physical address can overlap, due to compression
2499+
* For example we are in leaf X processing its last item, which is the
2500+
* file extent item for file range [512K, 1M[, and after
2501+
* btrfs_next_leaf() releases the path, there's an ordered extent that
2502+
* completes for the file range [768K, 2M[, and that results in trimming
2503+
* the file extent item so that it now corresponds to the file range
2504+
* [512K, 768K[ and a new file extent item is inserted for the file
2505+
* range [768K, 2M[, which may end up as the last item of leaf X or as
2506+
* the first item of the next leaf - in either case btrfs_next_leaf()
2507+
* will leave us with a path pointing to the new extent item, for the
2508+
* file range [768K, 2M[, since that's the first key that follows the
2509+
* last one we processed. So in order not to report overlapping extents
2510+
* to user space, we trim the length of the previously cached extent and
2511+
* emit it.
2512+
*
2513+
* Upon calling btrfs_next_leaf() we may also find an extent with an
2514+
* offset smaller than or equals to cache->offset, and this happens
2515+
* when we had a hole or prealloc extent with several delalloc ranges in
2516+
* it, but after btrfs_next_leaf() released the path, delalloc was
2517+
* flushed and the resulting ordered extents were completed, so we can
2518+
* now have found a file extent item for an offset that is smaller than
2519+
* or equals to what we have in cache->offset. We deal with this as
2520+
* described below.
24972521
*/
2498-
if (cache->offset + cache->len > offset) {
2499-
WARN_ON(1);
2500-
return -EINVAL;
2522+
cache_end = cache->offset + cache->len;
2523+
if (cache_end > offset) {
2524+
if (offset == cache->offset) {
2525+
/*
2526+
* We cached a dealloc range (found in the io tree) for
2527+
* a hole or prealloc extent and we have now found a
2528+
* file extent item for the same offset. What we have
2529+
* now is more recent and up to date, so discard what
2530+
* we had in the cache and use what we have just found.
2531+
*/
2532+
goto assign;
2533+
} else if (offset > cache->offset) {
2534+
/*
2535+
* The extent range we previously found ends after the
2536+
* offset of the file extent item we found and that
2537+
* offset falls somewhere in the middle of that previous
2538+
* extent range. So adjust the range we previously found
2539+
* to end at the offset of the file extent item we have
2540+
* just found, since this extent is more up to date.
2541+
* Emit that adjusted range and cache the file extent
2542+
* item we have just found. This corresponds to the case
2543+
* where a previously found file extent item was split
2544+
* due to an ordered extent completing.
2545+
*/
2546+
cache->len = offset - cache->offset;
2547+
goto emit;
2548+
} else {
2549+
const u64 range_end = offset + len;
2550+
2551+
/*
2552+
* The offset of the file extent item we have just found
2553+
* is behind the cached offset. This means we were
2554+
* processing a hole or prealloc extent for which we
2555+
* have found delalloc ranges (in the io tree), so what
2556+
* we have in the cache is the last delalloc range we
2557+
* found while the file extent item we found can be
2558+
* either for a whole delalloc range we previously
2559+
* emmitted or only a part of that range.
2560+
*
2561+
* We have two cases here:
2562+
*
2563+
* 1) The file extent item's range ends at or behind the
2564+
* cached extent's end. In this case just ignore the
2565+
* current file extent item because we don't want to
2566+
* overlap with previous ranges that may have been
2567+
* emmitted already;
2568+
*
2569+
* 2) The file extent item starts behind the currently
2570+
* cached extent but its end offset goes beyond the
2571+
* end offset of the cached extent. We don't want to
2572+
* overlap with a previous range that may have been
2573+
* emmitted already, so we emit the currently cached
2574+
* extent and then partially store the current file
2575+
* extent item's range in the cache, for the subrange
2576+
* going the cached extent's end to the end of the
2577+
* file extent item.
2578+
*/
2579+
if (range_end <= cache_end)
2580+
return 0;
2581+
2582+
if (!(flags & (FIEMAP_EXTENT_ENCODED | FIEMAP_EXTENT_DELALLOC)))
2583+
phys += cache_end - offset;
2584+
2585+
offset = cache_end;
2586+
len = range_end - cache_end;
2587+
goto emit;
2588+
}
25012589
}
25022590

25032591
/*
@@ -2517,6 +2605,7 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
25172605
return 0;
25182606
}
25192607

2608+
emit:
25202609
/* Not mergeable, need to submit cached one */
25212610
ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
25222611
cache->len, cache->flags);
@@ -2907,17 +2996,15 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
29072996
range_end = round_up(start + len, sectorsize);
29082997
prev_extent_end = range_start;
29092998

2910-
btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED);
2911-
29122999
ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end);
29133000
if (ret < 0)
2914-
goto out_unlock;
3001+
goto out;
29153002
btrfs_release_path(path);
29163003

29173004
path->reada = READA_FORWARD;
29183005
ret = fiemap_search_slot(inode, path, range_start);
29193006
if (ret < 0) {
2920-
goto out_unlock;
3007+
goto out;
29213008
} else if (ret > 0) {
29223009
/*
29233010
* No file extent item found, but we may have delalloc between
@@ -2964,7 +3051,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
29643051
backref_ctx, 0, 0, 0,
29653052
prev_extent_end, hole_end);
29663053
if (ret < 0) {
2967-
goto out_unlock;
3054+
goto out;
29683055
} else if (ret > 0) {
29693056
/* fiemap_fill_next_extent() told us to stop. */
29703057
stopped = true;
@@ -3020,7 +3107,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
30203107
extent_gen,
30213108
backref_ctx);
30223109
if (ret < 0)
3023-
goto out_unlock;
3110+
goto out;
30243111
else if (ret > 0)
30253112
flags |= FIEMAP_EXTENT_SHARED;
30263113
}
@@ -3031,7 +3118,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
30313118
}
30323119

30333120
if (ret < 0) {
3034-
goto out_unlock;
3121+
goto out;
30353122
} else if (ret > 0) {
30363123
/* fiemap_fill_next_extent() told us to stop. */
30373124
stopped = true;
@@ -3042,12 +3129,12 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
30423129
next_item:
30433130
if (fatal_signal_pending(current)) {
30443131
ret = -EINTR;
3045-
goto out_unlock;
3132+
goto out;
30463133
}
30473134

30483135
ret = fiemap_next_leaf_item(inode, path);
30493136
if (ret < 0) {
3050-
goto out_unlock;
3137+
goto out;
30513138
} else if (ret > 0) {
30523139
/* No more file extent items for this inode. */
30533140
break;
@@ -3071,7 +3158,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
30713158
&delalloc_cached_state, backref_ctx,
30723159
0, 0, 0, prev_extent_end, range_end - 1);
30733160
if (ret < 0)
3074-
goto out_unlock;
3161+
goto out;
30753162
prev_extent_end = range_end;
30763163
}
30773164

@@ -3109,9 +3196,6 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
31093196
}
31103197

31113198
ret = emit_last_fiemap_cache(fieinfo, &cache);
3112-
3113-
out_unlock:
3114-
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
31153199
out:
31163200
free_extent_state(delalloc_cached_state);
31173201
btrfs_free_backref_share_ctx(backref_ctx);

fs/btrfs/inode.c

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7835,6 +7835,7 @@ struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter,
78357835
static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
78367836
u64 start, u64 len)
78377837
{
7838+
struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
78387839
int ret;
78397840

78407841
ret = fiemap_prep(inode, fieinfo, start, &len, 0);
@@ -7860,7 +7861,26 @@ static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
78607861
return ret;
78617862
}
78627863

7863-
return extent_fiemap(BTRFS_I(inode), fieinfo, start, len);
7864+
btrfs_inode_lock(btrfs_inode, BTRFS_ILOCK_SHARED);
7865+
7866+
/*
7867+
* We did an initial flush to avoid holding the inode's lock while
7868+
* triggering writeback and waiting for the completion of IO and ordered
7869+
* extents. Now after we locked the inode we do it again, because it's
7870+
* possible a new write may have happened in between those two steps.
7871+
*/
7872+
if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) {
7873+
ret = btrfs_wait_ordered_range(inode, 0, LLONG_MAX);
7874+
if (ret) {
7875+
btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED);
7876+
return ret;
7877+
}
7878+
}
7879+
7880+
ret = extent_fiemap(btrfs_inode, fieinfo, start, len);
7881+
btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED);
7882+
7883+
return ret;
78647884
}
78657885

78667886
static int btrfs_writepages(struct address_space *mapping,

fs/btrfs/ioctl.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -721,7 +721,7 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
721721
free_extent_buffer(leaf);
722722
leaf = NULL;
723723

724-
new_root = btrfs_get_new_fs_root(fs_info, objectid, anon_dev);
724+
new_root = btrfs_get_new_fs_root(fs_info, objectid, &anon_dev);
725725
if (IS_ERR(new_root)) {
726726
ret = PTR_ERR(new_root);
727727
btrfs_abort_transaction(trans, ret);

fs/btrfs/transaction.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1834,7 +1834,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
18341834
}
18351835

18361836
key.offset = (u64)-1;
1837-
pending->snap = btrfs_get_new_fs_root(fs_info, objectid, pending->anon_dev);
1837+
pending->snap = btrfs_get_new_fs_root(fs_info, objectid, &pending->anon_dev);
18381838
if (IS_ERR(pending->snap)) {
18391839
ret = PTR_ERR(pending->snap);
18401840
pending->snap = NULL;

0 commit comments

Comments
 (0)