Skip to content

Commit b6ce2db

Browse files
Brian Fostergregkh
authored andcommitted
ext4: partial zero eof block on unaligned inode size extension
[ Upstream commit c7fc036 ] Using mapped writes, it's technically possible to expose stale post-eof data on a truncate up operation. Consider the following example: $ xfs_io -fc "pwrite 0 2k" -c "mmap 0 4k" -c "mwrite 2k 2k" \ -c "truncate 8k" -c "pread -v 2k 16" <file> ... 00000800: 58 58 58 58 58 58 58 58 58 58 58 58 58 58 58 58 XXXXXXXXXXXXXXXX ... This shows that the post-eof data written via mwrite lands within EOF after a truncate up. While this is deliberate of the test case, behavior is somewhat unpredictable because writeback does post-eof zeroing, and writeback can occur at any time in the background. For example, an fsync inserted between the mwrite and truncate causes the subsequent read to instead return zeroes. This basically means that there is a race window in this situation between any subsequent extending operation and writeback that dictates whether post-eof data is exposed to the file or zeroed. To prevent this problem, perform partial block zeroing as part of the various inode size extending operations that are susceptible to it. For truncate extension, zero around the original eof similar to how truncate down does partial zeroing of the new eof. For extension via writes and fallocate related operations, zero the newly exposed range of the file to cover any partial zeroing that must occur at the original and new eof blocks. Signed-off-by: Brian Foster <[email protected]> Link: https://patch.msgid.link/[email protected] Signed-off-by: Theodore Ts'o <[email protected]> Signed-off-by: Sasha Levin <[email protected]>
1 parent 2e996ea commit b6ce2db

File tree

2 files changed

+42
-16
lines changed

2 files changed

+42
-16
lines changed

fs/ext4/extents.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4482,7 +4482,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
44824482
int depth = 0;
44834483
struct ext4_map_blocks map;
44844484
unsigned int credits;
4485-
loff_t epos;
4485+
loff_t epos, old_size = i_size_read(inode);
44864486

44874487
BUG_ON(!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS));
44884488
map.m_lblk = offset;
@@ -4541,6 +4541,11 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
45414541
if (ext4_update_inode_size(inode, epos) & 0x1)
45424542
inode_set_mtime_to_ts(inode,
45434543
inode_get_ctime(inode));
4544+
if (epos > old_size) {
4545+
pagecache_isize_extended(inode, old_size, epos);
4546+
ext4_zero_partial_blocks(handle, inode,
4547+
old_size, epos - old_size);
4548+
}
45444549
}
45454550
ret2 = ext4_mark_inode_dirty(handle, inode);
45464551
ext4_update_inode_fsync_trans(handle, inode, 1);

fs/ext4/inode.c

Lines changed: 36 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1307,8 +1307,10 @@ static int ext4_write_end(struct file *file,
13071307
folio_unlock(folio);
13081308
folio_put(folio);
13091309

1310-
if (old_size < pos && !verity)
1310+
if (old_size < pos && !verity) {
13111311
pagecache_isize_extended(inode, old_size, pos);
1312+
ext4_zero_partial_blocks(handle, inode, old_size, pos - old_size);
1313+
}
13121314
/*
13131315
* Don't mark the inode dirty under folio lock. First, it unnecessarily
13141316
* makes the holding time of folio lock longer. Second, it forces lock
@@ -1423,8 +1425,10 @@ static int ext4_journalled_write_end(struct file *file,
14231425
folio_unlock(folio);
14241426
folio_put(folio);
14251427

1426-
if (old_size < pos && !verity)
1428+
if (old_size < pos && !verity) {
14271429
pagecache_isize_extended(inode, old_size, pos);
1430+
ext4_zero_partial_blocks(handle, inode, old_size, pos - old_size);
1431+
}
14281432

14291433
if (size_changed) {
14301434
ret2 = ext4_mark_inode_dirty(handle, inode);
@@ -2985,7 +2989,8 @@ static int ext4_da_do_write_end(struct address_space *mapping,
29852989
struct inode *inode = mapping->host;
29862990
loff_t old_size = inode->i_size;
29872991
bool disksize_changed = false;
2988-
loff_t new_i_size;
2992+
loff_t new_i_size, zero_len = 0;
2993+
handle_t *handle;
29892994

29902995
if (unlikely(!folio_buffers(folio))) {
29912996
folio_unlock(folio);
@@ -3029,18 +3034,21 @@ static int ext4_da_do_write_end(struct address_space *mapping,
30293034
folio_unlock(folio);
30303035
folio_put(folio);
30313036

3032-
if (old_size < pos)
3037+
if (pos > old_size) {
30333038
pagecache_isize_extended(inode, old_size, pos);
3039+
zero_len = pos - old_size;
3040+
}
30343041

3035-
if (disksize_changed) {
3036-
handle_t *handle;
3042+
if (!disksize_changed && !zero_len)
3043+
return copied;
30373044

3038-
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
3039-
if (IS_ERR(handle))
3040-
return PTR_ERR(handle);
3041-
ext4_mark_inode_dirty(handle, inode);
3042-
ext4_journal_stop(handle);
3043-
}
3045+
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
3046+
if (IS_ERR(handle))
3047+
return PTR_ERR(handle);
3048+
if (zero_len)
3049+
ext4_zero_partial_blocks(handle, inode, old_size, zero_len);
3050+
ext4_mark_inode_dirty(handle, inode);
3051+
ext4_journal_stop(handle);
30443052

30453053
return copied;
30463054
}
@@ -5426,6 +5434,14 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
54265434
}
54275435

54285436
if (attr->ia_size != inode->i_size) {
5437+
/* attach jbd2 jinode for EOF folio tail zeroing */
5438+
if (attr->ia_size & (inode->i_sb->s_blocksize - 1) ||
5439+
oldsize & (inode->i_sb->s_blocksize - 1)) {
5440+
error = ext4_inode_attach_jinode(inode);
5441+
if (error)
5442+
goto err_out;
5443+
}
5444+
54295445
handle = ext4_journal_start(inode, EXT4_HT_INODE, 3);
54305446
if (IS_ERR(handle)) {
54315447
error = PTR_ERR(handle);
@@ -5436,12 +5452,17 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
54365452
orphan = 1;
54375453
}
54385454
/*
5439-
* Update c/mtime on truncate up, ext4_truncate() will
5440-
* update c/mtime in shrink case below
5455+
* Update c/mtime and tail zero the EOF folio on
5456+
* truncate up. ext4_truncate() handles the shrink case
5457+
* below.
54415458
*/
5442-
if (!shrink)
5459+
if (!shrink) {
54435460
inode_set_mtime_to_ts(inode,
54445461
inode_set_ctime_current(inode));
5462+
if (oldsize & (inode->i_sb->s_blocksize - 1))
5463+
ext4_block_truncate_page(handle,
5464+
inode->i_mapping, oldsize);
5465+
}
54455466

54465467
if (shrink)
54475468
ext4_fc_track_range(handle, inode,

0 commit comments

Comments
 (0)