Skip to content

Commit cb85f4d

Browse files
ebiggerstytso
authored andcommitted
ext4: fix race between writepages and enabling EXT4_EXTENTS_FL
If EXT4_EXTENTS_FL is set on an inode while ext4_writepages() is running on it, the following warning in ext4_add_complete_io() can be hit: WARNING: CPU: 1 PID: 0 at fs/ext4/page-io.c:234 ext4_put_io_end_defer+0xf0/0x120 Here's a minimal reproducer (not 100% reliable) (root isn't required): while true; do sync done & while true; do rm -f file touch file chattr -e file echo X >> file chattr +e file done The problem is that in ext4_writepages(), ext4_should_dioread_nolock() (which only returns true on extent-based files) is checked once to set the number of reserved journal credits, and also again later to select the flags for ext4_map_blocks() and copy the reserved journal handle to ext4_io_end::handle. But if EXT4_EXTENTS_FL is being concurrently set, the first check can see dioread_nolock disabled while the later one can see it enabled, causing the reserved handle to unexpectedly be NULL. Since changing EXT4_EXTENTS_FL is uncommon, and there may be other races related to doing so as well, fix this by synchronizing changing EXT4_EXTENTS_FL with ext4_writepages() via the existing s_writepages_rwsem (previously called s_journal_flag_rwsem). This was originally reported by syzbot without a reproducer at https://syzkaller.appspot.com/bug?extid=2202a584a00fffd19fbf, but now that dioread_nolock is the default I also started seeing this when running syzkaller locally. Link: https://lore.kernel.org/r/[email protected] Reported-by: [email protected] Fixes: 6b523df ("ext4: use transaction reservation for extent conversion in ext4_end_io") Signed-off-by: Eric Biggers <[email protected]> Signed-off-by: Theodore Ts'o <[email protected]> Reviewed-by: Jan Kara <[email protected]> Cc: [email protected]
1 parent bbd5593 commit cb85f4d

File tree

2 files changed

+23
-9
lines changed

2 files changed

+23
-9
lines changed

fs/ext4/ext4.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1552,7 +1552,10 @@ struct ext4_sb_info {
15521552
struct ratelimit_state s_warning_ratelimit_state;
15531553
struct ratelimit_state s_msg_ratelimit_state;
15541554

1555-
/* Barrier between changing inodes' journal flags and writepages ops. */
1555+
/*
1556+
* Barrier between writepages ops and changing any inode's JOURNAL_DATA
1557+
* or EXTENTS flag.
1558+
*/
15561559
struct percpu_rw_semaphore s_writepages_rwsem;
15571560
struct dax_device *s_daxdev;
15581561
#ifdef CONFIG_EXT4_DEBUG

fs/ext4/migrate.c

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,7 @@ static int free_ext_block(handle_t *handle, struct inode *inode)
407407

408408
int ext4_ext_migrate(struct inode *inode)
409409
{
410+
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
410411
handle_t *handle;
411412
int retval = 0, i;
412413
__le32 *i_data;
@@ -431,6 +432,8 @@ int ext4_ext_migrate(struct inode *inode)
431432
*/
432433
return retval;
433434

435+
percpu_down_write(&sbi->s_writepages_rwsem);
436+
434437
/*
435438
* Worst case we can touch the allocation bitmaps, a bgd
436439
* block, and a block to link in the orphan list. We do need
@@ -441,7 +444,7 @@ int ext4_ext_migrate(struct inode *inode)
441444

442445
if (IS_ERR(handle)) {
443446
retval = PTR_ERR(handle);
444-
return retval;
447+
goto out_unlock;
445448
}
446449
goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) *
447450
EXT4_INODES_PER_GROUP(inode->i_sb)) + 1;
@@ -452,7 +455,7 @@ int ext4_ext_migrate(struct inode *inode)
452455
if (IS_ERR(tmp_inode)) {
453456
retval = PTR_ERR(tmp_inode);
454457
ext4_journal_stop(handle);
455-
return retval;
458+
goto out_unlock;
456459
}
457460
i_size_write(tmp_inode, i_size_read(inode));
458461
/*
@@ -494,7 +497,7 @@ int ext4_ext_migrate(struct inode *inode)
494497
*/
495498
ext4_orphan_del(NULL, tmp_inode);
496499
retval = PTR_ERR(handle);
497-
goto out;
500+
goto out_tmp_inode;
498501
}
499502

500503
ei = EXT4_I(inode);
@@ -576,10 +579,11 @@ int ext4_ext_migrate(struct inode *inode)
576579
ext4_ext_tree_init(handle, tmp_inode);
577580
out_stop:
578581
ext4_journal_stop(handle);
579-
out:
582+
out_tmp_inode:
580583
unlock_new_inode(tmp_inode);
581584
iput(tmp_inode);
582-
585+
out_unlock:
586+
percpu_up_write(&sbi->s_writepages_rwsem);
583587
return retval;
584588
}
585589

@@ -589,7 +593,8 @@ int ext4_ext_migrate(struct inode *inode)
589593
int ext4_ind_migrate(struct inode *inode)
590594
{
591595
struct ext4_extent_header *eh;
592-
struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
596+
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
597+
struct ext4_super_block *es = sbi->s_es;
593598
struct ext4_inode_info *ei = EXT4_I(inode);
594599
struct ext4_extent *ex;
595600
unsigned int i, len;
@@ -613,9 +618,13 @@ int ext4_ind_migrate(struct inode *inode)
613618
if (test_opt(inode->i_sb, DELALLOC))
614619
ext4_alloc_da_blocks(inode);
615620

621+
percpu_down_write(&sbi->s_writepages_rwsem);
622+
616623
handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1);
617-
if (IS_ERR(handle))
618-
return PTR_ERR(handle);
624+
if (IS_ERR(handle)) {
625+
ret = PTR_ERR(handle);
626+
goto out_unlock;
627+
}
619628

620629
down_write(&EXT4_I(inode)->i_data_sem);
621630
ret = ext4_ext_check_inode(inode);
@@ -650,5 +659,7 @@ int ext4_ind_migrate(struct inode *inode)
650659
errout:
651660
ext4_journal_stop(handle);
652661
up_write(&EXT4_I(inode)->i_data_sem);
662+
out_unlock:
663+
percpu_up_write(&sbi->s_writepages_rwsem);
653664
return ret;
654665
}

0 commit comments

Comments
 (0)