Skip to content

Commit c7fc036

Browse files
Brian Fostertytso
authored andcommitted
ext4: partial zero eof block on unaligned inode size extension
Using mapped writes, it's technically possible to expose stale post-eof data on a truncate up operation. Consider the following example: $ xfs_io -fc "pwrite 0 2k" -c "mmap 0 4k" -c "mwrite 2k 2k" \ -c "truncate 8k" -c "pread -v 2k 16" <file> ... 00000800: 58 58 58 58 58 58 58 58 58 58 58 58 58 58 58 58 XXXXXXXXXXXXXXXX ... This shows that the post-eof data written via mwrite lands within EOF after a truncate up. While this is deliberate of the test case, behavior is somewhat unpredictable because writeback does post-eof zeroing, and writeback can occur at any time in the background. For example, an fsync inserted between the mwrite and truncate causes the subsequent read to instead return zeroes. This basically means that there is a race window in this situation between any subsequent extending operation and writeback that dictates whether post-eof data is exposed to the file or zeroed. To prevent this problem, perform partial block zeroing as part of the various inode size extending operations that are susceptible to it. For truncate extension, zero around the original eof similar to how truncate down does partial zeroing of the new eof. For extension via writes and fallocate related operations, zero the newly exposed range of the file to cover any partial zeroing that must occur at the original and new eof blocks. Signed-off-by: Brian Foster <[email protected]> Link: https://patch.msgid.link/[email protected] Signed-off-by: Theodore Ts'o <[email protected]>
1 parent 25f51ea commit c7fc036

File tree

2 files changed

+42
-16
lines changed

2 files changed

+42
-16
lines changed

fs/ext4/extents.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4482,7 +4482,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
44824482
int depth = 0;
44834483
struct ext4_map_blocks map;
44844484
unsigned int credits;
4485-
loff_t epos;
4485+
loff_t epos, old_size = i_size_read(inode);
44864486

44874487
BUG_ON(!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS));
44884488
map.m_lblk = offset;
@@ -4541,6 +4541,11 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
45414541
if (ext4_update_inode_size(inode, epos) & 0x1)
45424542
inode_set_mtime_to_ts(inode,
45434543
inode_get_ctime(inode));
4544+
if (epos > old_size) {
4545+
pagecache_isize_extended(inode, old_size, epos);
4546+
ext4_zero_partial_blocks(handle, inode,
4547+
old_size, epos - old_size);
4548+
}
45444549
}
45454550
ret2 = ext4_mark_inode_dirty(handle, inode);
45464551
ext4_update_inode_fsync_trans(handle, inode, 1);

fs/ext4/inode.c

Lines changed: 36 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1314,8 +1314,10 @@ static int ext4_write_end(struct file *file,
13141314
folio_unlock(folio);
13151315
folio_put(folio);
13161316

1317-
if (old_size < pos && !verity)
1317+
if (old_size < pos && !verity) {
13181318
pagecache_isize_extended(inode, old_size, pos);
1319+
ext4_zero_partial_blocks(handle, inode, old_size, pos - old_size);
1320+
}
13191321
/*
13201322
* Don't mark the inode dirty under folio lock. First, it unnecessarily
13211323
* makes the holding time of folio lock longer. Second, it forces lock
@@ -1430,8 +1432,10 @@ static int ext4_journalled_write_end(struct file *file,
14301432
folio_unlock(folio);
14311433
folio_put(folio);
14321434

1433-
if (old_size < pos && !verity)
1435+
if (old_size < pos && !verity) {
14341436
pagecache_isize_extended(inode, old_size, pos);
1437+
ext4_zero_partial_blocks(handle, inode, old_size, pos - old_size);
1438+
}
14351439

14361440
if (size_changed) {
14371441
ret2 = ext4_mark_inode_dirty(handle, inode);
@@ -2992,7 +2996,8 @@ static int ext4_da_do_write_end(struct address_space *mapping,
29922996
struct inode *inode = mapping->host;
29932997
loff_t old_size = inode->i_size;
29942998
bool disksize_changed = false;
2995-
loff_t new_i_size;
2999+
loff_t new_i_size, zero_len = 0;
3000+
handle_t *handle;
29963001

29973002
if (unlikely(!folio_buffers(folio))) {
29983003
folio_unlock(folio);
@@ -3036,18 +3041,21 @@ static int ext4_da_do_write_end(struct address_space *mapping,
30363041
folio_unlock(folio);
30373042
folio_put(folio);
30383043

3039-
if (old_size < pos)
3044+
if (pos > old_size) {
30403045
pagecache_isize_extended(inode, old_size, pos);
3046+
zero_len = pos - old_size;
3047+
}
30413048

3042-
if (disksize_changed) {
3043-
handle_t *handle;
3049+
if (!disksize_changed && !zero_len)
3050+
return copied;
30443051

3045-
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
3046-
if (IS_ERR(handle))
3047-
return PTR_ERR(handle);
3048-
ext4_mark_inode_dirty(handle, inode);
3049-
ext4_journal_stop(handle);
3050-
}
3052+
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
3053+
if (IS_ERR(handle))
3054+
return PTR_ERR(handle);
3055+
if (zero_len)
3056+
ext4_zero_partial_blocks(handle, inode, old_size, zero_len);
3057+
ext4_mark_inode_dirty(handle, inode);
3058+
ext4_journal_stop(handle);
30513059

30523060
return copied;
30533061
}
@@ -5433,6 +5441,14 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
54335441
}
54345442

54355443
if (attr->ia_size != inode->i_size) {
5444+
/* attach jbd2 jinode for EOF folio tail zeroing */
5445+
if (attr->ia_size & (inode->i_sb->s_blocksize - 1) ||
5446+
oldsize & (inode->i_sb->s_blocksize - 1)) {
5447+
error = ext4_inode_attach_jinode(inode);
5448+
if (error)
5449+
goto err_out;
5450+
}
5451+
54365452
handle = ext4_journal_start(inode, EXT4_HT_INODE, 3);
54375453
if (IS_ERR(handle)) {
54385454
error = PTR_ERR(handle);
@@ -5443,12 +5459,17 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
54435459
orphan = 1;
54445460
}
54455461
/*
5446-
* Update c/mtime on truncate up, ext4_truncate() will
5447-
* update c/mtime in shrink case below
5462+
* Update c/mtime and tail zero the EOF folio on
5463+
* truncate up. ext4_truncate() handles the shrink case
5464+
* below.
54485465
*/
5449-
if (!shrink)
5466+
if (!shrink) {
54505467
inode_set_mtime_to_ts(inode,
54515468
inode_set_ctime_current(inode));
5469+
if (oldsize & (inode->i_sb->s_blocksize - 1))
5470+
ext4_block_truncate_page(handle,
5471+
inode->i_mapping, oldsize);
5472+
}
54525473

54535474
if (shrink)
54545475
ext4_fc_track_range(handle, inode,

0 commit comments

Comments
 (0)