Skip to content

Commit 70e97e5

Browse files
adam900710kdave
authored andcommitted
btrfs: keep folios locked inside run_delalloc_nocow()
[BUG] There is a very low chance that DEBUG_WARN() inside btrfs_writepage_cow_fixup() can be triggered when CONFIG_BTRFS_EXPERIMENTAL is enabled. This only happens after run_delalloc_nocow() failed. Unfortunately I haven't hit it for a while thus no real world dmesg for now. [CAUSE] There is a race window where after run_delalloc_nocow() failed, error handling can race with writeback thread. Before we hit run_delalloc_nocow(), there is an inode with the following dirty pages: (4K page size, 4K block size, no large folio) 0 4K 8K 12K 16K |/////////|///////////|///////////|////////////| The inode also have NODATACOW flag, and the above dirty range will go through different extents during run_delalloc_range(): 0 4K 8K 12K 16K | NOCOW | COW | COW | NOCOW | The race happen like this: writeback thread A | writeback thread B ----------------------------------+-------------------------------------- Writeback for folio 0 | run_delalloc_nocow() | |- nocow_one_range() | | For range [0, 4K), ret = 0 | | | |- fallback_to_cow() | | For range [4K, 8K), ret = 0 | | Folio 4K *UNLOCKED* | | | Writeback for folio 4K |- fallback_to_cow() | extent_writepage() | For range [8K, 12K), failure | |- writepage_delalloc() | | | |- btrfs_cleanup_ordered_extents()| | |- btrfs_folio_clear_ordered() | | | Folio 0 still locked, safe | | | | | Ordered extent already allocated. | | | Nothing to do. | | |- extent_writepage_io() | | |- btrfs_writepage_cow_fixup() |- btrfs_folio_clear_ordered() | | Folio 4K hold by thread B, | | UNSAFE! | |- btrfs_test_ordered() | | Cleared by thread A, | | | |- DEBUG_WARN(); This is only possible after run_delalloc_nocow() failure, as cow_file_range() will keep all folios and io tree range locked, until everything is finished or after error handling. The root cause is we allow fallback_to_cow() and nocow_one_range() to unlock the folios after a successful run, so that during error handling we're no longer safe to use btrfs_cleanup_ordered_extents() as the folios are already unlocked. [FIX] - Make fallback_to_cow() and nocow_one_range() to keep folios locked after a successful run For fallback_to_cow() we can pass COW_FILE_RANGE_KEEP_LOCKED flag into cow_file_range(). For nocow_one_range() we have to remove the PAGE_UNLOCK flag from extent_clear_unlock_delalloc(). - Unlock folios if everything is fine in run_delalloc_nocow() - Use extent_clear_unlock_delalloc() to handle range [@start, @cur_offset) inside run_delalloc_nocow() Since folios are still locked, we do not need cleanup_dirty_folios() to do the cleanup. extent_clear_unlock_delalloc() with "PAGE_START_WRIBACK | PAGE_END_WRITEBACK" will clear the dirty flags. - Remove cleanup_dirty_folios() Signed-off-by: Qu Wenruo <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent 93f5b58 commit 70e97e5

File tree

1 file changed

+22
-51
lines changed

1 file changed

+22
-51
lines changed

fs/btrfs/inode.c

Lines changed: 22 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1772,9 +1772,15 @@ static int fallback_to_cow(struct btrfs_inode *inode,
17721772
* Don't try to create inline extents, as a mix of inline extent that
17731773
* is written out and unlocked directly and a normal NOCOW extent
17741774
* doesn't work.
1775+
*
1776+
* And here we do not unlock the folio after a successful run.
1777+
* The folios will be unlocked after everything is finished, or by error handling.
1778+
*
1779+
* This is to ensure error handling won't need to clear dirty/ordered flags without
1780+
* a locked folio, which can race with writeback.
17751781
*/
17761782
ret = cow_file_range(inode, locked_folio, start, end, NULL,
1777-
COW_FILE_RANGE_NO_INLINE);
1783+
COW_FILE_RANGE_NO_INLINE | COW_FILE_RANGE_KEEP_LOCKED);
17781784
ASSERT(ret != 1);
17791785
return ret;
17801786
}
@@ -1917,53 +1923,6 @@ static int can_nocow_file_extent(struct btrfs_path *path,
19171923
return ret < 0 ? ret : can_nocow;
19181924
}
19191925

1920-
/*
1921-
* Cleanup the dirty folios which will never be submitted due to error.
1922-
*
1923-
* When running a delalloc range, we may need to split the ranges (due to
1924-
* fragmentation or NOCOW). If we hit an error in the later part, we will error
1925-
* out and previously successfully executed range will never be submitted, thus
1926-
* we have to cleanup those folios by clearing their dirty flag, starting and
1927-
* finishing the writeback.
1928-
*/
1929-
static void cleanup_dirty_folios(struct btrfs_inode *inode,
1930-
struct folio *locked_folio,
1931-
u64 start, u64 end, int error)
1932-
{
1933-
struct btrfs_fs_info *fs_info = inode->root->fs_info;
1934-
struct address_space *mapping = inode->vfs_inode.i_mapping;
1935-
pgoff_t start_index = start >> PAGE_SHIFT;
1936-
pgoff_t end_index = end >> PAGE_SHIFT;
1937-
u32 len;
1938-
1939-
ASSERT(end + 1 - start < U32_MAX);
1940-
ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
1941-
IS_ALIGNED(end + 1, fs_info->sectorsize));
1942-
len = end + 1 - start;
1943-
1944-
/*
1945-
* Handle the locked folio first.
1946-
* The btrfs_folio_clamp_*() helpers can handle range out of the folio case.
1947-
*/
1948-
btrfs_folio_clamp_finish_io(fs_info, locked_folio, start, len);
1949-
1950-
for (pgoff_t index = start_index; index <= end_index; index++) {
1951-
struct folio *folio;
1952-
1953-
/* Already handled at the beginning. */
1954-
if (index == locked_folio->index)
1955-
continue;
1956-
folio = __filemap_get_folio(mapping, index, FGP_LOCK, GFP_NOFS);
1957-
/* Cache already dropped, no need to do any cleanup. */
1958-
if (IS_ERR(folio))
1959-
continue;
1960-
btrfs_folio_clamp_finish_io(fs_info, locked_folio, start, len);
1961-
folio_unlock(folio);
1962-
folio_put(folio);
1963-
}
1964-
mapping_set_error(mapping, error);
1965-
}
1966-
19671926
static int nocow_one_range(struct btrfs_inode *inode, struct folio *locked_folio,
19681927
struct extent_state **cached,
19691928
struct can_nocow_file_extent_args *nocow_args,
@@ -2013,7 +1972,7 @@ static int nocow_one_range(struct btrfs_inode *inode, struct folio *locked_folio
20131972
extent_clear_unlock_delalloc(inode, file_pos, end, locked_folio, cached,
20141973
EXTENT_LOCKED | EXTENT_DELALLOC |
20151974
EXTENT_CLEAR_DATA_RESV,
2016-
PAGE_UNLOCK | PAGE_SET_ORDERED);
1975+
PAGE_SET_ORDERED);
20171976
return ret;
20181977
error:
20191978
btrfs_cleanup_ordered_extents(inode, file_pos, len);
@@ -2247,6 +2206,14 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
22472206
cow_start = (u64)-1;
22482207
}
22492208

2209+
/*
2210+
* Everything is finished without an error, can unlock the folios now.
2211+
*
2212+
* No need to touch the io tree range nor set folio ordered flag, as
2213+
* fallback_to_cow() and nocow_one_range() have already handled them.
2214+
*/
2215+
extent_clear_unlock_delalloc(inode, start, end, locked_folio, NULL, 0, PAGE_UNLOCK);
2216+
22502217
btrfs_free_path(path);
22512218
return 0;
22522219

@@ -2305,9 +2272,13 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
23052272
}
23062273

23072274
if (oe_cleanup_len) {
2275+
const u64 oe_cleanup_end = oe_cleanup_start + oe_cleanup_len - 1;
23082276
btrfs_cleanup_ordered_extents(inode, oe_cleanup_start, oe_cleanup_len);
2309-
cleanup_dirty_folios(inode, locked_folio, oe_cleanup_start,
2310-
oe_cleanup_start + oe_cleanup_len - 1, ret);
2277+
extent_clear_unlock_delalloc(inode, oe_cleanup_start, oe_cleanup_end,
2278+
locked_folio, NULL,
2279+
EXTENT_LOCKED | EXTENT_DELALLOC,
2280+
PAGE_UNLOCK | PAGE_START_WRITEBACK |
2281+
PAGE_END_WRITEBACK);
23112282
}
23122283

23132284
if (untouched_len) {

0 commit comments

Comments
 (0)