Skip to content

Commit 418b090

Browse files
fdmananakdave
authored andcommitted
btrfs: ensure fiemap doesn't race with writes when FIEMAP_FLAG_SYNC is given
When FIEMAP_FLAG_SYNC is given to fiemap the expectation is that that are no concurrent writes and we get a stable view of the inode's extent layout. When the flag is given we flush all IO (and wait for ordered extents to complete) and then lock the inode in shared mode, however that leaves open the possibility that a write might happen right after the flushing and before locking the inode. So fix this by flushing again after locking the inode - we leave the initial flushing before locking the inode to avoid holding the lock and blocking other RO operations while waiting for IO and ordered extents to complete. The second flushing while holding the inode's lock will most of the time do nothing or very little since the time window for new writes to have happened is small. Reviewed-by: Josef Bacik <[email protected]> Signed-off-by: Filipe Manana <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent a1a4a9c commit 418b090

File tree

2 files changed

+29
-14
lines changed

2 files changed

+29
-14
lines changed

fs/btrfs/extent_io.c

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2996,17 +2996,15 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
29962996
range_end = round_up(start + len, sectorsize);
29972997
prev_extent_end = range_start;
29982998

2999-
btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED);
3000-
30012999
ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end);
30023000
if (ret < 0)
3003-
goto out_unlock;
3001+
goto out;
30043002
btrfs_release_path(path);
30053003

30063004
path->reada = READA_FORWARD;
30073005
ret = fiemap_search_slot(inode, path, range_start);
30083006
if (ret < 0) {
3009-
goto out_unlock;
3007+
goto out;
30103008
} else if (ret > 0) {
30113009
/*
30123010
* No file extent item found, but we may have delalloc between
@@ -3053,7 +3051,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
30533051
backref_ctx, 0, 0, 0,
30543052
prev_extent_end, hole_end);
30553053
if (ret < 0) {
3056-
goto out_unlock;
3054+
goto out;
30573055
} else if (ret > 0) {
30583056
/* fiemap_fill_next_extent() told us to stop. */
30593057
stopped = true;
@@ -3109,7 +3107,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
31093107
extent_gen,
31103108
backref_ctx);
31113109
if (ret < 0)
3112-
goto out_unlock;
3110+
goto out;
31133111
else if (ret > 0)
31143112
flags |= FIEMAP_EXTENT_SHARED;
31153113
}
@@ -3120,7 +3118,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
31203118
}
31213119

31223120
if (ret < 0) {
3123-
goto out_unlock;
3121+
goto out;
31243122
} else if (ret > 0) {
31253123
/* fiemap_fill_next_extent() told us to stop. */
31263124
stopped = true;
@@ -3131,12 +3129,12 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
31313129
next_item:
31323130
if (fatal_signal_pending(current)) {
31333131
ret = -EINTR;
3134-
goto out_unlock;
3132+
goto out;
31353133
}
31363134

31373135
ret = fiemap_next_leaf_item(inode, path);
31383136
if (ret < 0) {
3139-
goto out_unlock;
3137+
goto out;
31403138
} else if (ret > 0) {
31413139
/* No more file extent items for this inode. */
31423140
break;
@@ -3160,7 +3158,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
31603158
&delalloc_cached_state, backref_ctx,
31613159
0, 0, 0, prev_extent_end, range_end - 1);
31623160
if (ret < 0)
3163-
goto out_unlock;
3161+
goto out;
31643162
prev_extent_end = range_end;
31653163
}
31663164

@@ -3198,9 +3196,6 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
31983196
}
31993197

32003198
ret = emit_last_fiemap_cache(fieinfo, &cache);
3201-
3202-
out_unlock:
3203-
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
32043199
out:
32053200
free_extent_state(delalloc_cached_state);
32063201
btrfs_free_backref_share_ctx(backref_ctx);

fs/btrfs/inode.c

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7835,6 +7835,7 @@ struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter,
78357835
static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
78367836
u64 start, u64 len)
78377837
{
7838+
struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
78387839
int ret;
78397840

78407841
ret = fiemap_prep(inode, fieinfo, start, &len, 0);
@@ -7860,7 +7861,26 @@ static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
78607861
return ret;
78617862
}
78627863

7863-
return extent_fiemap(BTRFS_I(inode), fieinfo, start, len);
7864+
btrfs_inode_lock(btrfs_inode, BTRFS_ILOCK_SHARED);
7865+
7866+
/*
7867+
* We did an initial flush to avoid holding the inode's lock while
7868+
* triggering writeback and waiting for the completion of IO and ordered
7869+
* extents. Now after we locked the inode we do it again, because it's
7870+
* possible a new write may have happened in between those two steps.
7871+
*/
7872+
if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) {
7873+
ret = btrfs_wait_ordered_range(inode, 0, LLONG_MAX);
7874+
if (ret) {
7875+
btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED);
7876+
return ret;
7877+
}
7878+
}
7879+
7880+
ret = extent_fiemap(btrfs_inode, fieinfo, start, len);
7881+
btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED);
7882+
7883+
return ret;
78647884
}
78657885

78667886
static int btrfs_writepages(struct address_space *mapping,

0 commit comments

Comments
 (0)