Skip to content

Commit a3163ca

Browse files
committed
Merge tag 'ext4_for_linus_stable' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 fixes from Ted Ts'o: "More miscellaneous ext4 bug fixes (all stable fodder)" * tag 'ext4_for_linus_stable' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: ext4: fix mount failure with quota configured as module jbd2: fix ocfs2 corrupt when clearing block group bits ext4: fix race between writepages and enabling EXT4_EXTENTS_FL ext4: rename s_journal_flag_rwsem to s_writepages_rwsem ext4: fix potential race between s_flex_groups online resizing and access ext4: fix potential race between s_group_info online resizing and access ext4: fix potential race between online resizing and write operations ext4: add cond_resched() to __ext4_find_entry() ext4: fix a data race in EXT4_I(inode)->i_disksize
2 parents c6188df + 9db176b commit a3163ca

File tree

10 files changed

+256
-108
lines changed

10 files changed

+256
-108
lines changed

fs/ext4/balloc.c

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,7 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
270270
ext4_group_t ngroups = ext4_get_groups_count(sb);
271271
struct ext4_group_desc *desc;
272272
struct ext4_sb_info *sbi = EXT4_SB(sb);
273+
struct buffer_head *bh_p;
273274

274275
if (block_group >= ngroups) {
275276
ext4_error(sb, "block_group >= groups_count - block_group = %u,"
@@ -280,18 +281,25 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
280281

281282
group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
282283
offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
283-
if (!sbi->s_group_desc[group_desc]) {
284+
bh_p = sbi_array_rcu_deref(sbi, s_group_desc, group_desc);
285+
/*
286+
* sbi_array_rcu_deref returns with rcu unlocked, this is ok since
287+
* the pointer being dereferenced won't be dereferenced again. By
288+
* looking at the usage in add_new_gdb() the value isn't modified,
289+
* just the pointer, and so it remains valid.
290+
*/
291+
if (!bh_p) {
284292
ext4_error(sb, "Group descriptor not loaded - "
285293
"block_group = %u, group_desc = %u, desc = %u",
286294
block_group, group_desc, offset);
287295
return NULL;
288296
}
289297

290298
desc = (struct ext4_group_desc *)(
291-
(__u8 *)sbi->s_group_desc[group_desc]->b_data +
299+
(__u8 *)bh_p->b_data +
292300
offset * EXT4_DESC_SIZE(sb));
293301
if (bh)
294-
*bh = sbi->s_group_desc[group_desc];
302+
*bh = bh_p;
295303
return desc;
296304
}
297305

fs/ext4/ext4.h

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1400,7 +1400,7 @@ struct ext4_sb_info {
14001400
loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */
14011401
struct buffer_head * s_sbh; /* Buffer containing the super block */
14021402
struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
1403-
struct buffer_head **s_group_desc;
1403+
struct buffer_head * __rcu *s_group_desc;
14041404
unsigned int s_mount_opt;
14051405
unsigned int s_mount_opt2;
14061406
unsigned int s_mount_flags;
@@ -1462,7 +1462,7 @@ struct ext4_sb_info {
14621462
#endif
14631463

14641464
/* for buddy allocator */
1465-
struct ext4_group_info ***s_group_info;
1465+
struct ext4_group_info ** __rcu *s_group_info;
14661466
struct inode *s_buddy_cache;
14671467
spinlock_t s_md_lock;
14681468
unsigned short *s_mb_offsets;
@@ -1512,7 +1512,7 @@ struct ext4_sb_info {
15121512
unsigned int s_extent_max_zeroout_kb;
15131513

15141514
unsigned int s_log_groups_per_flex;
1515-
struct flex_groups *s_flex_groups;
1515+
struct flex_groups * __rcu *s_flex_groups;
15161516
ext4_group_t s_flex_groups_allocated;
15171517

15181518
/* workqueue for reserved extent conversions (buffered io) */
@@ -1552,8 +1552,11 @@ struct ext4_sb_info {
15521552
struct ratelimit_state s_warning_ratelimit_state;
15531553
struct ratelimit_state s_msg_ratelimit_state;
15541554

1555-
/* Barrier between changing inodes' journal flags and writepages ops. */
1556-
struct percpu_rw_semaphore s_journal_flag_rwsem;
1555+
/*
1556+
* Barrier between writepages ops and changing any inode's JOURNAL_DATA
1557+
* or EXTENTS flag.
1558+
*/
1559+
struct percpu_rw_semaphore s_writepages_rwsem;
15571560
struct dax_device *s_daxdev;
15581561
#ifdef CONFIG_EXT4_DEBUG
15591562
unsigned long s_simulate_fail;
@@ -1576,6 +1579,23 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
15761579
ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
15771580
}
15781581

1582+
/*
1583+
* Returns: sbi->field[index]
1584+
* Used to access an array element from the following sbi fields which require
1585+
* rcu protection to avoid dereferencing an invalid pointer due to reassignment
1586+
* - s_group_desc
1587+
* - s_group_info
1588+
* - s_flex_group
1589+
*/
1590+
#define sbi_array_rcu_deref(sbi, field, index) \
1591+
({ \
1592+
typeof(*((sbi)->field)) _v; \
1593+
rcu_read_lock(); \
1594+
_v = ((typeof(_v)*)rcu_dereference((sbi)->field))[index]; \
1595+
rcu_read_unlock(); \
1596+
_v; \
1597+
})
1598+
15791599
/*
15801600
* Simulate_fail codes
15811601
*/
@@ -2730,6 +2750,7 @@ extern int ext4_generic_delete_entry(handle_t *handle,
27302750
extern bool ext4_empty_dir(struct inode *inode);
27312751

27322752
/* resize.c */
2753+
extern void ext4_kvfree_array_rcu(void *to_free);
27332754
extern int ext4_group_add(struct super_block *sb,
27342755
struct ext4_new_group_data *input);
27352756
extern int ext4_group_extend(struct super_block *sb,
@@ -2976,13 +2997,13 @@ static inline
29762997
struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
29772998
ext4_group_t group)
29782999
{
2979-
struct ext4_group_info ***grp_info;
3000+
struct ext4_group_info **grp_info;
29803001
long indexv, indexh;
29813002
BUG_ON(group >= EXT4_SB(sb)->s_groups_count);
2982-
grp_info = EXT4_SB(sb)->s_group_info;
29833003
indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb));
29843004
indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1);
2985-
return grp_info[indexv][indexh];
3005+
grp_info = sbi_array_rcu_deref(EXT4_SB(sb), s_group_info, indexv);
3006+
return grp_info[indexh];
29863007
}
29873008

29883009
/*
@@ -3032,7 +3053,7 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
30323053
!inode_is_locked(inode));
30333054
down_write(&EXT4_I(inode)->i_data_sem);
30343055
if (newsize > EXT4_I(inode)->i_disksize)
3035-
EXT4_I(inode)->i_disksize = newsize;
3056+
WRITE_ONCE(EXT4_I(inode)->i_disksize, newsize);
30363057
up_write(&EXT4_I(inode)->i_data_sem);
30373058
}
30383059

fs/ext4/ialloc.c

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -328,11 +328,13 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
328328

329329
percpu_counter_inc(&sbi->s_freeinodes_counter);
330330
if (sbi->s_log_groups_per_flex) {
331-
ext4_group_t f = ext4_flex_group(sbi, block_group);
331+
struct flex_groups *fg;
332332

333-
atomic_inc(&sbi->s_flex_groups[f].free_inodes);
333+
fg = sbi_array_rcu_deref(sbi, s_flex_groups,
334+
ext4_flex_group(sbi, block_group));
335+
atomic_inc(&fg->free_inodes);
334336
if (is_directory)
335-
atomic_dec(&sbi->s_flex_groups[f].used_dirs);
337+
atomic_dec(&fg->used_dirs);
336338
}
337339
BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata");
338340
fatal = ext4_handle_dirty_metadata(handle, NULL, bh2);
@@ -368,12 +370,13 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
368370
int flex_size, struct orlov_stats *stats)
369371
{
370372
struct ext4_group_desc *desc;
371-
struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups;
372373

373374
if (flex_size > 1) {
374-
stats->free_inodes = atomic_read(&flex_group[g].free_inodes);
375-
stats->free_clusters = atomic64_read(&flex_group[g].free_clusters);
376-
stats->used_dirs = atomic_read(&flex_group[g].used_dirs);
375+
struct flex_groups *fg = sbi_array_rcu_deref(EXT4_SB(sb),
376+
s_flex_groups, g);
377+
stats->free_inodes = atomic_read(&fg->free_inodes);
378+
stats->free_clusters = atomic64_read(&fg->free_clusters);
379+
stats->used_dirs = atomic_read(&fg->used_dirs);
377380
return;
378381
}
379382

@@ -1054,7 +1057,8 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
10541057
if (sbi->s_log_groups_per_flex) {
10551058
ext4_group_t f = ext4_flex_group(sbi, group);
10561059

1057-
atomic_inc(&sbi->s_flex_groups[f].used_dirs);
1060+
atomic_inc(&sbi_array_rcu_deref(sbi, s_flex_groups,
1061+
f)->used_dirs);
10581062
}
10591063
}
10601064
if (ext4_has_group_desc_csum(sb)) {
@@ -1077,7 +1081,8 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
10771081

10781082
if (sbi->s_log_groups_per_flex) {
10791083
flex_group = ext4_flex_group(sbi, group);
1080-
atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes);
1084+
atomic_dec(&sbi_array_rcu_deref(sbi, s_flex_groups,
1085+
flex_group)->free_inodes);
10811086
}
10821087

10831088
inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb);

fs/ext4/inode.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2465,7 +2465,7 @@ static int mpage_map_and_submit_extent(handle_t *handle,
24652465
* truncate are avoided by checking i_size under i_data_sem.
24662466
*/
24672467
disksize = ((loff_t)mpd->first_page) << PAGE_SHIFT;
2468-
if (disksize > EXT4_I(inode)->i_disksize) {
2468+
if (disksize > READ_ONCE(EXT4_I(inode)->i_disksize)) {
24692469
int err2;
24702470
loff_t i_size;
24712471

@@ -2628,7 +2628,7 @@ static int ext4_writepages(struct address_space *mapping,
26282628
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
26292629
return -EIO;
26302630

2631-
percpu_down_read(&sbi->s_journal_flag_rwsem);
2631+
percpu_down_read(&sbi->s_writepages_rwsem);
26322632
trace_ext4_writepages(inode, wbc);
26332633

26342634
/*
@@ -2849,7 +2849,7 @@ static int ext4_writepages(struct address_space *mapping,
28492849
out_writepages:
28502850
trace_ext4_writepages_result(inode, wbc, ret,
28512851
nr_to_write - wbc->nr_to_write);
2852-
percpu_up_read(&sbi->s_journal_flag_rwsem);
2852+
percpu_up_read(&sbi->s_writepages_rwsem);
28532853
return ret;
28542854
}
28552855

@@ -2864,13 +2864,13 @@ static int ext4_dax_writepages(struct address_space *mapping,
28642864
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
28652865
return -EIO;
28662866

2867-
percpu_down_read(&sbi->s_journal_flag_rwsem);
2867+
percpu_down_read(&sbi->s_writepages_rwsem);
28682868
trace_ext4_writepages(inode, wbc);
28692869

28702870
ret = dax_writeback_mapping_range(mapping, sbi->s_daxdev, wbc);
28712871
trace_ext4_writepages_result(inode, wbc, ret,
28722872
nr_to_write - wbc->nr_to_write);
2873-
percpu_up_read(&sbi->s_journal_flag_rwsem);
2873+
percpu_up_read(&sbi->s_writepages_rwsem);
28742874
return ret;
28752875
}
28762876

@@ -5861,7 +5861,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
58615861
}
58625862
}
58635863

5864-
percpu_down_write(&sbi->s_journal_flag_rwsem);
5864+
percpu_down_write(&sbi->s_writepages_rwsem);
58655865
jbd2_journal_lock_updates(journal);
58665866

58675867
/*
@@ -5878,15 +5878,15 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
58785878
err = jbd2_journal_flush(journal);
58795879
if (err < 0) {
58805880
jbd2_journal_unlock_updates(journal);
5881-
percpu_up_write(&sbi->s_journal_flag_rwsem);
5881+
percpu_up_write(&sbi->s_writepages_rwsem);
58825882
return err;
58835883
}
58845884
ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
58855885
}
58865886
ext4_set_aops(inode);
58875887

58885888
jbd2_journal_unlock_updates(journal);
5889-
percpu_up_write(&sbi->s_journal_flag_rwsem);
5889+
percpu_up_write(&sbi->s_writepages_rwsem);
58905890

58915891
if (val)
58925892
up_write(&EXT4_I(inode)->i_mmap_sem);

0 commit comments

Comments
 (0)