Skip to content

Commit 0deab3f

Browse files
fdmananakdave
authored andcommitted
btrfs: fix -ENOSPC mmap write failure on NOCOW files/extents
If we attempt a mmap write into a NOCOW file or a prealloc extent when there is no more available data space (or unallocated space to allocate a new data block group) and we can do a NOCOW write (there are no reflinks for the target extent or snapshots), we always fail due to -ENOSPC, unlike for the regular buffered write and direct IO paths where we check that we can do a NOCOW write in case we can't reserve data space. Simple reproducer: $ cat test.sh #!/bin/bash DEV=/dev/sdi MNT=/mnt/sdi umount $DEV &> /dev/null mkfs.btrfs -f -b $((512 * 1024 * 1024)) $DEV mount $DEV $MNT touch $MNT/foobar # Make it a NOCOW file. chattr +C $MNT/foobar # Add initial data to file. xfs_io -c "pwrite -S 0xab 0 1M" $MNT/foobar # Fill all the remaining data space and unallocated space with data. dd if=/dev/zero of=$MNT/filler bs=4K &> /dev/null # Overwrite the file with a mmap write. Should succeed. xfs_io -c "mmap -w 0 1M" \ -c "mwrite -S 0xcd 0 1M" \ -c "munmap" \ $MNT/foobar # Unmount, mount again and verify the new data was persisted. umount $MNT mount $DEV $MNT od -A d -t x1 $MNT/foobar umount $MNT Running this: $ ./test.sh (...) wrote 1048576/1048576 bytes at offset 0 1 MiB, 256 ops; 0.0008 sec (1.188 GiB/sec and 311435.5231 ops/sec) ./test.sh: line 24: 234865 Bus error xfs_io -c "mmap -w 0 1M" -c "mwrite -S 0xcd 0 1M" -c "munmap" $MNT/foobar 0000000 ab ab ab ab ab ab ab ab ab ab ab ab ab ab ab ab * 1048576 Fix this by not failing in case we can't allocate data space and we can NOCOW into the target extent - reserving only metadata space in this case. After this change the test passes: $ ./test.sh (...) wrote 1048576/1048576 bytes at offset 0 1 MiB, 256 ops; 0.0007 sec (1.262 GiB/sec and 330749.3540 ops/sec) 0000000 cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd * 1048576 A test case for fstests will be added soon. Reviewed-by: Qu Wenruo <[email protected]> Signed-off-by: Filipe Manana <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent cdc6bf9 commit 0deab3f

File tree

1 file changed

+51
-8
lines changed

1 file changed

+51
-8
lines changed

fs/btrfs/file.c

Lines changed: 51 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1841,6 +1841,7 @@ static vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf)
18411841
loff_t size;
18421842
size_t fsize = folio_size(folio);
18431843
int ret;
1844+
bool only_release_metadata = false;
18441845
u64 reserved_space;
18451846
u64 page_start;
18461847
u64 page_end;
@@ -1861,10 +1862,34 @@ static vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf)
18611862
* end up waiting indefinitely to get a lock on the page currently
18621863
* being processed by btrfs_page_mkwrite() function.
18631864
*/
1864-
ret = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved,
1865-
page_start, reserved_space);
1866-
if (ret < 0)
1865+
ret = btrfs_check_data_free_space(BTRFS_I(inode), &data_reserved,
1866+
page_start, reserved_space, false);
1867+
if (ret < 0) {
1868+
size_t write_bytes = reserved_space;
1869+
1870+
if (btrfs_check_nocow_lock(BTRFS_I(inode), page_start,
1871+
&write_bytes, false) <= 0)
1872+
goto out_noreserve;
1873+
1874+
only_release_metadata = true;
1875+
1876+
/*
1877+
* Can't write the whole range, there may be shared extents or
1878+
* holes in the range, bail out with @only_release_metadata set
1879+
* to true so that we unlock the nocow lock before returning the
1880+
* error.
1881+
*/
1882+
if (write_bytes < reserved_space)
1883+
goto out_noreserve;
1884+
}
1885+
ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), reserved_space,
1886+
reserved_space, false);
1887+
if (ret < 0) {
1888+
if (!only_release_metadata)
1889+
btrfs_free_reserved_data_space(BTRFS_I(inode), data_reserved,
1890+
page_start, reserved_space);
18671891
goto out_noreserve;
1892+
}
18681893

18691894
ret = file_update_time(vmf->vma->vm_file);
18701895
if (ret < 0)
@@ -1905,10 +1930,16 @@ static vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf)
19051930
if (folio_contains(folio, (size - 1) >> PAGE_SHIFT)) {
19061931
reserved_space = round_up(size - page_start, fs_info->sectorsize);
19071932
if (reserved_space < fsize) {
1933+
const u64 to_free = fsize - reserved_space;
1934+
19081935
end = page_start + reserved_space - 1;
1909-
btrfs_delalloc_release_space(BTRFS_I(inode),
1910-
data_reserved, end + 1,
1911-
fsize - reserved_space, true);
1936+
if (only_release_metadata)
1937+
btrfs_delalloc_release_metadata(BTRFS_I(inode),
1938+
to_free, true);
1939+
else
1940+
btrfs_delalloc_release_space(BTRFS_I(inode),
1941+
data_reserved, end + 1,
1942+
to_free, true);
19121943
}
19131944
}
19141945

@@ -1945,10 +1976,16 @@ static vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf)
19451976

19461977
btrfs_set_inode_last_sub_trans(BTRFS_I(inode));
19471978

1979+
if (only_release_metadata)
1980+
btrfs_set_extent_bit(io_tree, page_start, end, EXTENT_NORESERVE,
1981+
&cached_state);
1982+
19481983
btrfs_unlock_extent(io_tree, page_start, page_end, &cached_state);
19491984
up_read(&BTRFS_I(inode)->i_mmap_lock);
19501985

19511986
btrfs_delalloc_release_extents(BTRFS_I(inode), fsize);
1987+
if (only_release_metadata)
1988+
btrfs_check_nocow_unlock(BTRFS_I(inode));
19521989
sb_end_pagefault(inode->i_sb);
19531990
extent_changeset_free(data_reserved);
19541991
return VM_FAULT_LOCKED;
@@ -1958,10 +1995,16 @@ static vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf)
19581995
up_read(&BTRFS_I(inode)->i_mmap_lock);
19591996
out:
19601997
btrfs_delalloc_release_extents(BTRFS_I(inode), fsize);
1961-
btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, page_start,
1962-
reserved_space, true);
1998+
if (only_release_metadata)
1999+
btrfs_delalloc_release_metadata(BTRFS_I(inode), reserved_space, true);
2000+
else
2001+
btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
2002+
page_start, reserved_space, true);
19632003
extent_changeset_free(data_reserved);
19642004
out_noreserve:
2005+
if (only_release_metadata)
2006+
btrfs_check_nocow_unlock(BTRFS_I(inode));
2007+
19652008
sb_end_pagefault(inode->i_sb);
19662009

19672010
if (ret < 0)

0 commit comments

Comments
 (0)