Skip to content

Commit 65f8b80

Browse files
jankaratytso
authored andcommitted
ext4: fix race when reusing xattr blocks
When ext4_xattr_block_set() decides to remove xattr block the following race can happen: CPU1 CPU2 ext4_xattr_block_set() ext4_xattr_release_block() new_bh = ext4_xattr_block_cache_find() lock_buffer(bh); ref = le32_to_cpu(BHDR(bh)->h_refcount); if (ref == 1) { ... mb_cache_entry_delete(); unlock_buffer(bh); ext4_free_blocks(); ... ext4_forget(..., bh, ...); jbd2_journal_revoke(..., bh); ext4_journal_get_write_access(..., new_bh, ...) do_get_write_access() jbd2_journal_cancel_revoke(..., new_bh); Later the code in ext4_xattr_block_set() finds out the block got freed and cancels reusal of the block but the revoke stays canceled and so in case of block reuse and journal replay the filesystem can get corrupted. If the race works out slightly differently, we can also hit assertions in the jbd2 code. Fix the problem by making sure that once matching mbcache entry is found, code dropping the last xattr block reference (or trying to modify xattr block in place) waits until the mbcache entry reference is dropped. This way code trying to reuse xattr block is protected from someone trying to drop the last reference to xattr block. Reported-and-tested-by: Ritesh Harjani <[email protected]> CC: [email protected] Fixes: 82939d7 ("ext4: convert to mbcache2") Signed-off-by: Jan Kara <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Theodore Ts'o <[email protected]>
1 parent fd48e9a commit 65f8b80

File tree

1 file changed

+45
-22
lines changed

1 file changed

+45
-22
lines changed

fs/ext4/xattr.c

Lines changed: 45 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -439,9 +439,16 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
439439
/* Remove entry from mbcache when EA inode is getting evicted */
440440
void ext4_evict_ea_inode(struct inode *inode)
441441
{
442-
if (EA_INODE_CACHE(inode))
443-
mb_cache_entry_delete(EA_INODE_CACHE(inode),
444-
ext4_xattr_inode_get_hash(inode), inode->i_ino);
442+
struct mb_cache_entry *oe;
443+
444+
if (!EA_INODE_CACHE(inode))
445+
return;
446+
/* Wait for entry to get unused so that we can remove it */
447+
while ((oe = mb_cache_entry_delete_or_get(EA_INODE_CACHE(inode),
448+
ext4_xattr_inode_get_hash(inode), inode->i_ino))) {
449+
mb_cache_entry_wait_unused(oe);
450+
mb_cache_entry_put(EA_INODE_CACHE(inode), oe);
451+
}
445452
}
446453

447454
static int
@@ -1229,6 +1236,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
12291236
if (error)
12301237
goto out;
12311238

1239+
retry_ref:
12321240
lock_buffer(bh);
12331241
hash = le32_to_cpu(BHDR(bh)->h_hash);
12341242
ref = le32_to_cpu(BHDR(bh)->h_refcount);
@@ -1238,9 +1246,18 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
12381246
* This must happen under buffer lock for
12391247
* ext4_xattr_block_set() to reliably detect freed block
12401248
*/
1241-
if (ea_block_cache)
1242-
mb_cache_entry_delete(ea_block_cache, hash,
1243-
bh->b_blocknr);
1249+
if (ea_block_cache) {
1250+
struct mb_cache_entry *oe;
1251+
1252+
oe = mb_cache_entry_delete_or_get(ea_block_cache, hash,
1253+
bh->b_blocknr);
1254+
if (oe) {
1255+
unlock_buffer(bh);
1256+
mb_cache_entry_wait_unused(oe);
1257+
mb_cache_entry_put(ea_block_cache, oe);
1258+
goto retry_ref;
1259+
}
1260+
}
12441261
get_bh(bh);
12451262
unlock_buffer(bh);
12461263

@@ -1867,9 +1884,20 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
18671884
* ext4_xattr_block_set() to reliably detect modified
18681885
* block
18691886
*/
1870-
if (ea_block_cache)
1871-
mb_cache_entry_delete(ea_block_cache, hash,
1872-
bs->bh->b_blocknr);
1887+
if (ea_block_cache) {
1888+
struct mb_cache_entry *oe;
1889+
1890+
oe = mb_cache_entry_delete_or_get(ea_block_cache,
1891+
hash, bs->bh->b_blocknr);
1892+
if (oe) {
1893+
/*
1894+
* Xattr block is getting reused. Leave
1895+
* it alone.
1896+
*/
1897+
mb_cache_entry_put(ea_block_cache, oe);
1898+
goto clone_block;
1899+
}
1900+
}
18731901
ea_bdebug(bs->bh, "modifying in-place");
18741902
error = ext4_xattr_set_entry(i, s, handle, inode,
18751903
true /* is_block */);
@@ -1885,6 +1913,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
18851913
goto cleanup;
18861914
goto inserted;
18871915
}
1916+
clone_block:
18881917
unlock_buffer(bs->bh);
18891918
ea_bdebug(bs->bh, "cloning");
18901919
s->base = kmemdup(BHDR(bs->bh), bs->bh->b_size, GFP_NOFS);
@@ -1990,18 +2019,13 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
19902019
lock_buffer(new_bh);
19912020
/*
19922021
* We have to be careful about races with
1993-
* freeing, rehashing or adding references to
1994-
* xattr block. Once we hold buffer lock xattr
1995-
* block's state is stable so we can check
1996-
* whether the block got freed / rehashed or
1997-
* not. Since we unhash mbcache entry under
1998-
* buffer lock when freeing / rehashing xattr
1999-
* block, checking whether entry is still
2000-
* hashed is reliable. Same rules hold for
2001-
* e_reusable handling.
2022+
* adding references to xattr block. Once we
2023+
* hold buffer lock xattr block's state is
2024+
* stable so we can check the additional
2025+
* reference fits.
20022026
*/
2003-
if (hlist_bl_unhashed(&ce->e_hash_list) ||
2004-
!ce->e_reusable) {
2027+
ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
2028+
if (ref > EXT4_XATTR_REFCOUNT_MAX) {
20052029
/*
20062030
* Undo everything and check mbcache
20072031
* again.
@@ -2016,9 +2040,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
20162040
new_bh = NULL;
20172041
goto inserted;
20182042
}
2019-
ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
20202043
BHDR(new_bh)->h_refcount = cpu_to_le32(ref);
2021-
if (ref >= EXT4_XATTR_REFCOUNT_MAX)
2044+
if (ref == EXT4_XATTR_REFCOUNT_MAX)
20222045
ce->e_reusable = 0;
20232046
ea_bdebug(new_bh, "reusing; refcount now=%d",
20242047
ref);

0 commit comments

Comments
 (0)