Skip to content

Commit 00d873c

Browse files
jankaratytso
authored andcommitted
ext4: avoid deadlock in fs reclaim with page writeback
Ext4 has a filesystem wide lock protecting ext4_writepages() calls to avoid races with switching of journalled data flag or inode format. This lock can however cause a deadlock like: CPU0 CPU1 ext4_writepages() percpu_down_read(sbi->s_writepages_rwsem); ext4_change_inode_journal_flag() percpu_down_write(sbi->s_writepages_rwsem); - blocks, all readers block from now on ext4_do_writepages() ext4_init_io_end() kmem_cache_zalloc(io_end_cachep, GFP_KERNEL) fs_reclaim frees dentry... dentry_unlink_inode() iput() - last ref => iput_final() - inode dirty => write_inode_now()... ext4_writepages() tries to acquire sbi->s_writepages_rwsem and blocks forever Make sure we cannot recurse into filesystem reclaim from writeback code to avoid the deadlock. Reported-by: [email protected] Link: https://lore.kernel.org/all/[email protected] Fixes: c8585c6 ("ext4: fix races between changing inode journal mode and ext4_writepages") CC: [email protected] Signed-off-by: Jan Kara <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Theodore Ts'o <[email protected]>
1 parent b87c7cd commit 00d873c

File tree

3 files changed

+40
-13
lines changed

3 files changed

+40
-13
lines changed

fs/ext4/ext4.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1684,6 +1684,30 @@ static inline struct ext4_inode_info *EXT4_I(struct inode *inode)
16841684
return container_of(inode, struct ext4_inode_info, vfs_inode);
16851685
}
16861686

1687+
static inline int ext4_writepages_down_read(struct super_block *sb)
1688+
{
1689+
percpu_down_read(&EXT4_SB(sb)->s_writepages_rwsem);
1690+
return memalloc_nofs_save();
1691+
}
1692+
1693+
static inline void ext4_writepages_up_read(struct super_block *sb, int ctx)
1694+
{
1695+
memalloc_nofs_restore(ctx);
1696+
percpu_up_read(&EXT4_SB(sb)->s_writepages_rwsem);
1697+
}
1698+
1699+
static inline int ext4_writepages_down_write(struct super_block *sb)
1700+
{
1701+
percpu_down_write(&EXT4_SB(sb)->s_writepages_rwsem);
1702+
return memalloc_nofs_save();
1703+
}
1704+
1705+
static inline void ext4_writepages_up_write(struct super_block *sb, int ctx)
1706+
{
1707+
memalloc_nofs_restore(ctx);
1708+
percpu_up_write(&EXT4_SB(sb)->s_writepages_rwsem);
1709+
}
1710+
16871711
static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
16881712
{
16891713
return ino == EXT4_ROOT_INO ||

fs/ext4/inode.c

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2783,11 +2783,12 @@ static int ext4_writepages(struct address_space *mapping,
27832783
.can_map = 1,
27842784
};
27852785
int ret;
2786+
int alloc_ctx;
27862787

27872788
if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
27882789
return -EIO;
27892790

2790-
percpu_down_read(&EXT4_SB(sb)->s_writepages_rwsem);
2791+
alloc_ctx = ext4_writepages_down_read(sb);
27912792
ret = ext4_do_writepages(&mpd);
27922793
/*
27932794
* For data=journal writeback we could have come across pages marked
@@ -2796,7 +2797,7 @@ static int ext4_writepages(struct address_space *mapping,
27962797
*/
27972798
if (!ret && mpd.journalled_more_data)
27982799
ret = ext4_do_writepages(&mpd);
2799-
percpu_up_read(&EXT4_SB(sb)->s_writepages_rwsem);
2800+
ext4_writepages_up_read(sb, alloc_ctx);
28002801

28012802
return ret;
28022803
}
@@ -2824,17 +2825,18 @@ static int ext4_dax_writepages(struct address_space *mapping,
28242825
long nr_to_write = wbc->nr_to_write;
28252826
struct inode *inode = mapping->host;
28262827
struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
2828+
int alloc_ctx;
28272829

28282830
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
28292831
return -EIO;
28302832

2831-
percpu_down_read(&sbi->s_writepages_rwsem);
2833+
alloc_ctx = ext4_writepages_down_read(inode->i_sb);
28322834
trace_ext4_writepages(inode, wbc);
28332835

28342836
ret = dax_writeback_mapping_range(mapping, sbi->s_daxdev, wbc);
28352837
trace_ext4_writepages_result(inode, wbc, ret,
28362838
nr_to_write - wbc->nr_to_write);
2837-
percpu_up_read(&sbi->s_writepages_rwsem);
2839+
ext4_writepages_up_read(inode->i_sb, alloc_ctx);
28382840
return ret;
28392841
}
28402842

@@ -5928,7 +5930,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
59285930
journal_t *journal;
59295931
handle_t *handle;
59305932
int err;
5931-
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
5933+
int alloc_ctx;
59325934

59335935
/*
59345936
* We have to be very careful here: changing a data block's
@@ -5966,7 +5968,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
59665968
}
59675969
}
59685970

5969-
percpu_down_write(&sbi->s_writepages_rwsem);
5971+
alloc_ctx = ext4_writepages_down_write(inode->i_sb);
59705972
jbd2_journal_lock_updates(journal);
59715973

59725974
/*
@@ -5983,15 +5985,15 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
59835985
err = jbd2_journal_flush(journal, 0);
59845986
if (err < 0) {
59855987
jbd2_journal_unlock_updates(journal);
5986-
percpu_up_write(&sbi->s_writepages_rwsem);
5988+
ext4_writepages_up_write(inode->i_sb, alloc_ctx);
59875989
return err;
59885990
}
59895991
ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
59905992
}
59915993
ext4_set_aops(inode);
59925994

59935995
jbd2_journal_unlock_updates(journal);
5994-
percpu_up_write(&sbi->s_writepages_rwsem);
5996+
ext4_writepages_up_write(inode->i_sb, alloc_ctx);
59955997

59965998
if (val)
59975999
filemap_invalidate_unlock(inode->i_mapping);

fs/ext4/migrate.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -408,7 +408,6 @@ static int free_ext_block(handle_t *handle, struct inode *inode)
408408

409409
int ext4_ext_migrate(struct inode *inode)
410410
{
411-
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
412411
handle_t *handle;
413412
int retval = 0, i;
414413
__le32 *i_data;
@@ -418,6 +417,7 @@ int ext4_ext_migrate(struct inode *inode)
418417
unsigned long max_entries;
419418
__u32 goal, tmp_csum_seed;
420419
uid_t owner[2];
420+
int alloc_ctx;
421421

422422
/*
423423
* If the filesystem does not support extents, or the inode
@@ -434,7 +434,7 @@ int ext4_ext_migrate(struct inode *inode)
434434
*/
435435
return retval;
436436

437-
percpu_down_write(&sbi->s_writepages_rwsem);
437+
alloc_ctx = ext4_writepages_down_write(inode->i_sb);
438438

439439
/*
440440
* Worst case we can touch the allocation bitmaps and a block
@@ -586,7 +586,7 @@ int ext4_ext_migrate(struct inode *inode)
586586
unlock_new_inode(tmp_inode);
587587
iput(tmp_inode);
588588
out_unlock:
589-
percpu_up_write(&sbi->s_writepages_rwsem);
589+
ext4_writepages_up_write(inode->i_sb, alloc_ctx);
590590
return retval;
591591
}
592592

@@ -605,6 +605,7 @@ int ext4_ind_migrate(struct inode *inode)
605605
ext4_fsblk_t blk;
606606
handle_t *handle;
607607
int ret, ret2 = 0;
608+
int alloc_ctx;
608609

609610
if (!ext4_has_feature_extents(inode->i_sb) ||
610611
(!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
@@ -621,7 +622,7 @@ int ext4_ind_migrate(struct inode *inode)
621622
if (test_opt(inode->i_sb, DELALLOC))
622623
ext4_alloc_da_blocks(inode);
623624

624-
percpu_down_write(&sbi->s_writepages_rwsem);
625+
alloc_ctx = ext4_writepages_down_write(inode->i_sb);
625626

626627
handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1);
627628
if (IS_ERR(handle)) {
@@ -665,6 +666,6 @@ int ext4_ind_migrate(struct inode *inode)
665666
ext4_journal_stop(handle);
666667
up_write(&EXT4_I(inode)->i_data_sem);
667668
out_unlock:
668-
percpu_up_write(&sbi->s_writepages_rwsem);
669+
ext4_writepages_up_write(inode->i_sb, alloc_ctx);
669670
return ret;
670671
}

0 commit comments

Comments
 (0)