Skip to content

Commit 1d0c392

Browse files
committed
ext4: fix potential race between online resizing and write operations
During an online resize an array of pointers to buffer heads gets replaced so it can get enlarged. If there is a racing block allocation or deallocation which uses the old array, and the old array has gotten reused this can lead to a GPF or some other random kernel memory getting modified. Link: https://bugzilla.kernel.org/show_bug.cgi?id=206443 Link: https://lore.kernel.org/r/[email protected] Reported-by: Suraj Jitindar Singh <[email protected]> Signed-off-by: Theodore Ts'o <[email protected]> Cc: [email protected]
1 parent 9424ef5 commit 1d0c392

File tree

4 files changed

+97
-25
lines changed

4 files changed

+97
-25
lines changed

fs/ext4/balloc.c

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,7 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
270270
ext4_group_t ngroups = ext4_get_groups_count(sb);
271271
struct ext4_group_desc *desc;
272272
struct ext4_sb_info *sbi = EXT4_SB(sb);
273+
struct buffer_head *bh_p;
273274

274275
if (block_group >= ngroups) {
275276
ext4_error(sb, "block_group >= groups_count - block_group = %u,"
@@ -280,18 +281,25 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
280281

281282
group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
282283
offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
283-
if (!sbi->s_group_desc[group_desc]) {
284+
bh_p = sbi_array_rcu_deref(sbi, s_group_desc, group_desc);
285+
/*
286+
* sbi_array_rcu_deref returns with rcu unlocked, this is ok since
287+
* the pointer being dereferenced won't be dereferenced again. By
288+
* looking at the usage in add_new_gdb() the value isn't modified,
289+
* just the pointer, and so it remains valid.
290+
*/
291+
if (!bh_p) {
284292
ext4_error(sb, "Group descriptor not loaded - "
285293
"block_group = %u, group_desc = %u, desc = %u",
286294
block_group, group_desc, offset);
287295
return NULL;
288296
}
289297

290298
desc = (struct ext4_group_desc *)(
291-
(__u8 *)sbi->s_group_desc[group_desc]->b_data +
299+
(__u8 *)bh_p->b_data +
292300
offset * EXT4_DESC_SIZE(sb));
293301
if (bh)
294-
*bh = sbi->s_group_desc[group_desc];
302+
*bh = bh_p;
295303
return desc;
296304
}
297305

fs/ext4/ext4.h

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1400,7 +1400,7 @@ struct ext4_sb_info {
14001400
loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */
14011401
struct buffer_head * s_sbh; /* Buffer containing the super block */
14021402
struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
1403-
struct buffer_head **s_group_desc;
1403+
struct buffer_head * __rcu *s_group_desc;
14041404
unsigned int s_mount_opt;
14051405
unsigned int s_mount_opt2;
14061406
unsigned int s_mount_flags;
@@ -1576,6 +1576,23 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
15761576
ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
15771577
}
15781578

1579+
/*
1580+
* Returns: sbi->field[index]
1581+
* Used to access an array element from the following sbi fields which require
1582+
* rcu protection to avoid dereferencing an invalid pointer due to reassignment
1583+
* - s_group_desc
1584+
* - s_group_info
1585+
* - s_flex_group
1586+
*/
1587+
#define sbi_array_rcu_deref(sbi, field, index) \
1588+
({ \
1589+
typeof(*((sbi)->field)) _v; \
1590+
rcu_read_lock(); \
1591+
_v = ((typeof(_v)*)rcu_dereference((sbi)->field))[index]; \
1592+
rcu_read_unlock(); \
1593+
_v; \
1594+
})
1595+
15791596
/*
15801597
* Simulate_fail codes
15811598
*/
@@ -2730,6 +2747,7 @@ extern int ext4_generic_delete_entry(handle_t *handle,
27302747
extern bool ext4_empty_dir(struct inode *inode);
27312748

27322749
/* resize.c */
2750+
extern void ext4_kvfree_array_rcu(void *to_free);
27332751
extern int ext4_group_add(struct super_block *sb,
27342752
struct ext4_new_group_data *input);
27352753
extern int ext4_group_extend(struct super_block *sb,

fs/ext4/resize.c

Lines changed: 44 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,33 @@
1717

1818
#include "ext4_jbd2.h"
1919

20+
struct ext4_rcu_ptr {
21+
struct rcu_head rcu;
22+
void *ptr;
23+
};
24+
25+
static void ext4_rcu_ptr_callback(struct rcu_head *head)
26+
{
27+
struct ext4_rcu_ptr *ptr;
28+
29+
ptr = container_of(head, struct ext4_rcu_ptr, rcu);
30+
kvfree(ptr->ptr);
31+
kfree(ptr);
32+
}
33+
34+
void ext4_kvfree_array_rcu(void *to_free)
35+
{
36+
struct ext4_rcu_ptr *ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
37+
38+
if (ptr) {
39+
ptr->ptr = to_free;
40+
call_rcu(&ptr->rcu, ext4_rcu_ptr_callback);
41+
return;
42+
}
43+
synchronize_rcu();
44+
kvfree(to_free);
45+
}
46+
2047
int ext4_resize_begin(struct super_block *sb)
2148
{
2249
struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -542,8 +569,8 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
542569
brelse(gdb);
543570
goto out;
544571
}
545-
memcpy(gdb->b_data, sbi->s_group_desc[j]->b_data,
546-
gdb->b_size);
572+
memcpy(gdb->b_data, sbi_array_rcu_deref(sbi,
573+
s_group_desc, j)->b_data, gdb->b_size);
547574
set_buffer_uptodate(gdb);
548575

549576
err = ext4_handle_dirty_metadata(handle, NULL, gdb);
@@ -860,13 +887,15 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
860887
}
861888
brelse(dind);
862889

863-
o_group_desc = EXT4_SB(sb)->s_group_desc;
890+
rcu_read_lock();
891+
o_group_desc = rcu_dereference(EXT4_SB(sb)->s_group_desc);
864892
memcpy(n_group_desc, o_group_desc,
865893
EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
894+
rcu_read_unlock();
866895
n_group_desc[gdb_num] = gdb_bh;
867-
EXT4_SB(sb)->s_group_desc = n_group_desc;
896+
rcu_assign_pointer(EXT4_SB(sb)->s_group_desc, n_group_desc);
868897
EXT4_SB(sb)->s_gdb_count++;
869-
kvfree(o_group_desc);
898+
ext4_kvfree_array_rcu(o_group_desc);
870899

871900
le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
872901
err = ext4_handle_dirty_super(handle, sb);
@@ -909,9 +938,11 @@ static int add_new_gdb_meta_bg(struct super_block *sb,
909938
return err;
910939
}
911940

912-
o_group_desc = EXT4_SB(sb)->s_group_desc;
941+
rcu_read_lock();
942+
o_group_desc = rcu_dereference(EXT4_SB(sb)->s_group_desc);
913943
memcpy(n_group_desc, o_group_desc,
914944
EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
945+
rcu_read_unlock();
915946
n_group_desc[gdb_num] = gdb_bh;
916947

917948
BUFFER_TRACE(gdb_bh, "get_write_access");
@@ -922,9 +953,9 @@ static int add_new_gdb_meta_bg(struct super_block *sb,
922953
return err;
923954
}
924955

925-
EXT4_SB(sb)->s_group_desc = n_group_desc;
956+
rcu_assign_pointer(EXT4_SB(sb)->s_group_desc, n_group_desc);
926957
EXT4_SB(sb)->s_gdb_count++;
927-
kvfree(o_group_desc);
958+
ext4_kvfree_array_rcu(o_group_desc);
928959
return err;
929960
}
930961

@@ -1188,7 +1219,8 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb,
11881219
* use non-sparse filesystems anymore. This is already checked above.
11891220
*/
11901221
if (gdb_off) {
1191-
gdb_bh = sbi->s_group_desc[gdb_num];
1222+
gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc,
1223+
gdb_num);
11921224
BUFFER_TRACE(gdb_bh, "get_write_access");
11931225
err = ext4_journal_get_write_access(handle, gdb_bh);
11941226

@@ -1270,7 +1302,7 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb,
12701302
/*
12711303
* get_write_access() has been called on gdb_bh by ext4_add_new_desc().
12721304
*/
1273-
gdb_bh = sbi->s_group_desc[gdb_num];
1305+
gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, gdb_num);
12741306
/* Update group descriptor block for new group */
12751307
gdp = (struct ext4_group_desc *)(gdb_bh->b_data +
12761308
gdb_off * EXT4_DESC_SIZE(sb));
@@ -1497,7 +1529,8 @@ static int ext4_flex_group_add(struct super_block *sb,
14971529
for (; gdb_num <= gdb_num_end; gdb_num++) {
14981530
struct buffer_head *gdb_bh;
14991531

1500-
gdb_bh = sbi->s_group_desc[gdb_num];
1532+
gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc,
1533+
gdb_num);
15011534
if (old_gdb == gdb_bh->b_blocknr)
15021535
continue;
15031536
update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data,

fs/ext4/super.c

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1014,6 +1014,7 @@ static void ext4_put_super(struct super_block *sb)
10141014
{
10151015
struct ext4_sb_info *sbi = EXT4_SB(sb);
10161016
struct ext4_super_block *es = sbi->s_es;
1017+
struct buffer_head **group_desc;
10171018
int aborted = 0;
10181019
int i, err;
10191020

@@ -1046,9 +1047,12 @@ static void ext4_put_super(struct super_block *sb)
10461047
if (!sb_rdonly(sb))
10471048
ext4_commit_super(sb, 1);
10481049

1050+
rcu_read_lock();
1051+
group_desc = rcu_dereference(sbi->s_group_desc);
10491052
for (i = 0; i < sbi->s_gdb_count; i++)
1050-
brelse(sbi->s_group_desc[i]);
1051-
kvfree(sbi->s_group_desc);
1053+
brelse(group_desc[i]);
1054+
kvfree(group_desc);
1055+
rcu_read_unlock();
10521056
kvfree(sbi->s_flex_groups);
10531057
percpu_counter_destroy(&sbi->s_freeclusters_counter);
10541058
percpu_counter_destroy(&sbi->s_freeinodes_counter);
@@ -3634,7 +3638,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
36343638
{
36353639
struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev);
36363640
char *orig_data = kstrdup(data, GFP_KERNEL);
3637-
struct buffer_head *bh;
3641+
struct buffer_head *bh, **group_desc;
36383642
struct ext4_super_block *es = NULL;
36393643
struct ext4_sb_info *sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
36403644
ext4_fsblk_t block;
@@ -4290,9 +4294,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
42904294
goto failed_mount;
42914295
}
42924296
}
4293-
sbi->s_group_desc = kvmalloc_array(db_count,
4294-
sizeof(struct buffer_head *),
4295-
GFP_KERNEL);
4297+
rcu_assign_pointer(sbi->s_group_desc,
4298+
kvmalloc_array(db_count,
4299+
sizeof(struct buffer_head *),
4300+
GFP_KERNEL));
42964301
if (sbi->s_group_desc == NULL) {
42974302
ext4_msg(sb, KERN_ERR, "not enough memory");
42984303
ret = -ENOMEM;
@@ -4308,14 +4313,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
43084313
}
43094314

43104315
for (i = 0; i < db_count; i++) {
4316+
struct buffer_head *bh;
4317+
43114318
block = descriptor_loc(sb, logical_sb_block, i);
4312-
sbi->s_group_desc[i] = sb_bread_unmovable(sb, block);
4313-
if (!sbi->s_group_desc[i]) {
4319+
bh = sb_bread_unmovable(sb, block);
4320+
if (!bh) {
43144321
ext4_msg(sb, KERN_ERR,
43154322
"can't read group descriptor %d", i);
43164323
db_count = i;
43174324
goto failed_mount2;
43184325
}
4326+
rcu_read_lock();
4327+
rcu_dereference(sbi->s_group_desc)[i] = bh;
4328+
rcu_read_unlock();
43194329
}
43204330
sbi->s_gdb_count = db_count;
43214331
if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) {
@@ -4717,9 +4727,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
47174727
if (sbi->s_mmp_tsk)
47184728
kthread_stop(sbi->s_mmp_tsk);
47194729
failed_mount2:
4730+
rcu_read_lock();
4731+
group_desc = rcu_dereference(sbi->s_group_desc);
47204732
for (i = 0; i < db_count; i++)
4721-
brelse(sbi->s_group_desc[i]);
4722-
kvfree(sbi->s_group_desc);
4733+
brelse(group_desc[i]);
4734+
kvfree(group_desc);
4735+
rcu_read_unlock();
47234736
failed_mount:
47244737
if (sbi->s_chksum_driver)
47254738
crypto_free_shash(sbi->s_chksum_driver);

0 commit comments

Comments
 (0)