Skip to content

Commit 5dbb75e

Browse files
fdmananakdave
authored andcommitted
btrfs: fix RWF_NOWAIT writes blocking on extent locks and waiting for IO
A RWF_NOWAIT write is not supposed to wait on filesystem locks that can be held for a long time or for ongoing IO to complete. However when calling check_can_nocow(), if the inode has prealloc extents or has the NOCOW flag set, we can block on extent (file range) locks through the call to btrfs_lock_and_flush_ordered_range(). Such lock can take a significant amount of time to be available. For example, a fiemap task may be running, and iterating through the entire file range checking all extents and doing backref walking to determine if they are shared, or a readpage operation may be in progress. Also at btrfs_lock_and_flush_ordered_range(), called by check_can_nocow(), after locking the file range we wait for any existing ordered extent that is in progress to complete. Another operation that can take a significant amount of time and defeat the purpose of RWF_NOWAIT. So fix this by trying to lock the file range and if it's currently locked return -EAGAIN to user space. If we are able to lock the file range without waiting and there is an ordered extent in the range, return -EAGAIN as well, instead of waiting for it to complete. Finally, don't bother trying to lock the snapshot lock of the root when attempting a RWF_NOWAIT write, as that is only important for buffered writes. Fixes: edf064e ("btrfs: nowait aio support") Signed-off-by: Filipe Manana <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent 260a633 commit 5dbb75e

File tree

1 file changed

+26
-11
lines changed

1 file changed

+26
-11
lines changed

fs/btrfs/file.c

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1533,35 +1533,51 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
15331533
}
15341534

15351535
static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
1536-
size_t *write_bytes)
1536+
size_t *write_bytes, bool nowait)
15371537
{
15381538
struct btrfs_fs_info *fs_info = inode->root->fs_info;
15391539
struct btrfs_root *root = inode->root;
15401540
u64 lockstart, lockend;
15411541
u64 num_bytes;
15421542
int ret;
15431543

1544-
if (!btrfs_drew_try_write_lock(&root->snapshot_lock))
1544+
if (!nowait && !btrfs_drew_try_write_lock(&root->snapshot_lock))
15451545
return -EAGAIN;
15461546

15471547
lockstart = round_down(pos, fs_info->sectorsize);
15481548
lockend = round_up(pos + *write_bytes,
15491549
fs_info->sectorsize) - 1;
1550+
num_bytes = lockend - lockstart + 1;
15501551

1551-
btrfs_lock_and_flush_ordered_range(inode, lockstart,
1552-
lockend, NULL);
1552+
if (nowait) {
1553+
struct btrfs_ordered_extent *ordered;
1554+
1555+
if (!try_lock_extent(&inode->io_tree, lockstart, lockend))
1556+
return -EAGAIN;
1557+
1558+
ordered = btrfs_lookup_ordered_range(inode, lockstart,
1559+
num_bytes);
1560+
if (ordered) {
1561+
btrfs_put_ordered_extent(ordered);
1562+
ret = -EAGAIN;
1563+
goto out_unlock;
1564+
}
1565+
} else {
1566+
btrfs_lock_and_flush_ordered_range(inode, lockstart,
1567+
lockend, NULL);
1568+
}
15531569

1554-
num_bytes = lockend - lockstart + 1;
15551570
ret = can_nocow_extent(&inode->vfs_inode, lockstart, &num_bytes,
15561571
NULL, NULL, NULL);
15571572
if (ret <= 0) {
15581573
ret = 0;
1559-
btrfs_drew_write_unlock(&root->snapshot_lock);
1574+
if (!nowait)
1575+
btrfs_drew_write_unlock(&root->snapshot_lock);
15601576
} else {
15611577
*write_bytes = min_t(size_t, *write_bytes ,
15621578
num_bytes - pos + lockstart);
15631579
}
1564-
1580+
out_unlock:
15651581
unlock_extent(&inode->io_tree, lockstart, lockend);
15661582

15671583
return ret;
@@ -1633,7 +1649,7 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
16331649
if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
16341650
BTRFS_INODE_PREALLOC)) &&
16351651
check_can_nocow(BTRFS_I(inode), pos,
1636-
&write_bytes) > 0) {
1652+
&write_bytes, false) > 0) {
16371653
/*
16381654
* For nodata cow case, no need to reserve
16391655
* data space.
@@ -1912,12 +1928,11 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
19121928
*/
19131929
if (!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
19141930
BTRFS_INODE_PREALLOC)) ||
1915-
check_can_nocow(BTRFS_I(inode), pos, &nocow_bytes) <= 0) {
1931+
check_can_nocow(BTRFS_I(inode), pos, &nocow_bytes,
1932+
true) <= 0) {
19161933
inode_unlock(inode);
19171934
return -EAGAIN;
19181935
}
1919-
/* check_can_nocow() locks the snapshot lock on success */
1920-
btrfs_drew_write_unlock(&root->snapshot_lock);
19211936
/*
19221937
* There are holes in the range or parts of the range that must
19231938
* be COWed (shared extents, RO block groups, etc), so just bail

0 commit comments

Comments
 (0)