Skip to content

Commit f4265b8

Browse files
zhangyi089brauner
authored andcommitted
ext4: add FALLOC_FL_WRITE_ZEROES support
Add support for FALLOC_FL_WRITE_ZEROES if the underlying device enable the unmap write zeroes operation. This first allocates blocks as unwritten, then issues a zero command outside of the running journal handle, and finally converts them to a written state. Signed-off-by: Zhang Yi <[email protected]> Link: https://lore.kernel.org/[email protected] Reviewed-by: "Martin K. Petersen" <[email protected]> Signed-off-by: Christian Brauner <[email protected]>
1 parent 912b603 commit f4265b8

File tree

2 files changed

+57
-12
lines changed

2 files changed

+57
-12
lines changed

fs/ext4/extents.c

Lines changed: 55 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4501,6 +4501,8 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
45014501
struct ext4_map_blocks map;
45024502
unsigned int credits;
45034503
loff_t epos, old_size = i_size_read(inode);
4504+
unsigned int blkbits = inode->i_blkbits;
4505+
bool alloc_zero = false;
45044506

45054507
BUG_ON(!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS));
45064508
map.m_lblk = offset;
@@ -4513,6 +4515,17 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
45134515
if (len <= EXT_UNWRITTEN_MAX_LEN)
45144516
flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
45154517

4518+
/*
4519+
* Do the actual write zero during a running journal transaction
4520+
* costs a lot. First allocate an unwritten extent and then
4521+
* convert it to written after zeroing it out.
4522+
*/
4523+
if (flags & EXT4_GET_BLOCKS_ZERO) {
4524+
flags &= ~EXT4_GET_BLOCKS_ZERO;
4525+
flags |= EXT4_GET_BLOCKS_UNWRIT_EXT;
4526+
alloc_zero = true;
4527+
}
4528+
45164529
/*
45174530
* credits to insert 1 extent into extent tree
45184531
*/
@@ -4549,9 +4562,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
45494562
* allow a full retry cycle for any remaining allocations
45504563
*/
45514564
retries = 0;
4552-
map.m_lblk += ret;
4553-
map.m_len = len = len - ret;
4554-
epos = (loff_t)map.m_lblk << inode->i_blkbits;
4565+
epos = (loff_t)(map.m_lblk + ret) << blkbits;
45554566
inode_set_ctime_current(inode);
45564567
if (new_size) {
45574568
if (epos > new_size)
@@ -4571,6 +4582,21 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
45714582
ret2 = ret3 ? ret3 : ret2;
45724583
if (unlikely(ret2))
45734584
break;
4585+
4586+
if (alloc_zero &&
4587+
(map.m_flags & (EXT4_MAP_MAPPED | EXT4_MAP_UNWRITTEN))) {
4588+
ret2 = ext4_issue_zeroout(inode, map.m_lblk, map.m_pblk,
4589+
map.m_len);
4590+
if (likely(!ret2))
4591+
ret2 = ext4_convert_unwritten_extents(NULL,
4592+
inode, (loff_t)map.m_lblk << blkbits,
4593+
(loff_t)map.m_len << blkbits);
4594+
if (ret2)
4595+
break;
4596+
}
4597+
4598+
map.m_lblk += ret;
4599+
map.m_len = len = len - ret;
45744600
}
45754601
if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
45764602
goto retry;
@@ -4636,7 +4662,11 @@ static long ext4_zero_range(struct file *file, loff_t offset,
46364662
if (end_lblk > start_lblk) {
46374663
ext4_lblk_t zero_blks = end_lblk - start_lblk;
46384664

4639-
flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN | EXT4_EX_NOCACHE);
4665+
if (mode & FALLOC_FL_WRITE_ZEROES)
4666+
flags = EXT4_GET_BLOCKS_CREATE_ZERO | EXT4_EX_NOCACHE;
4667+
else
4668+
flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
4669+
EXT4_EX_NOCACHE);
46404670
ret = ext4_alloc_file_blocks(file, start_lblk, zero_blks,
46414671
new_size, flags);
46424672
if (ret)
@@ -4745,11 +4775,18 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
47454775
if (IS_ENCRYPTED(inode) &&
47464776
(mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE)))
47474777
return -EOPNOTSUPP;
4778+
/*
4779+
* Don't allow writing zeroes if the underlying device does not
4780+
* enable the unmap write zeroes operation.
4781+
*/
4782+
if ((mode & FALLOC_FL_WRITE_ZEROES) &&
4783+
!bdev_write_zeroes_unmap_sectors(inode->i_sb->s_bdev))
4784+
return -EOPNOTSUPP;
47484785

47494786
/* Return error if mode is not supported */
47504787
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
4751-
FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
4752-
FALLOC_FL_INSERT_RANGE))
4788+
FALLOC_FL_ZERO_RANGE | FALLOC_FL_COLLAPSE_RANGE |
4789+
FALLOC_FL_INSERT_RANGE | FALLOC_FL_WRITE_ZEROES))
47534790
return -EOPNOTSUPP;
47544791

47554792
inode_lock(inode);
@@ -4780,16 +4817,23 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
47804817
if (ret)
47814818
goto out_invalidate_lock;
47824819

4783-
if (mode & FALLOC_FL_PUNCH_HOLE)
4820+
switch (mode & FALLOC_FL_MODE_MASK) {
4821+
case FALLOC_FL_PUNCH_HOLE:
47844822
ret = ext4_punch_hole(file, offset, len);
4785-
else if (mode & FALLOC_FL_COLLAPSE_RANGE)
4823+
break;
4824+
case FALLOC_FL_COLLAPSE_RANGE:
47864825
ret = ext4_collapse_range(file, offset, len);
4787-
else if (mode & FALLOC_FL_INSERT_RANGE)
4826+
break;
4827+
case FALLOC_FL_INSERT_RANGE:
47884828
ret = ext4_insert_range(file, offset, len);
4789-
else if (mode & FALLOC_FL_ZERO_RANGE)
4829+
break;
4830+
case FALLOC_FL_ZERO_RANGE:
4831+
case FALLOC_FL_WRITE_ZEROES:
47904832
ret = ext4_zero_range(file, offset, len, mode);
4791-
else
4833+
break;
4834+
default:
47924835
ret = -EOPNOTSUPP;
4836+
}
47934837

47944838
out_invalidate_lock:
47954839
filemap_invalidate_unlock(mapping);

include/trace/events/ext4.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,8 @@ TRACE_DEFINE_ENUM(ES_REFERENCED_B);
9292
{ FALLOC_FL_KEEP_SIZE, "KEEP_SIZE"}, \
9393
{ FALLOC_FL_PUNCH_HOLE, "PUNCH_HOLE"}, \
9494
{ FALLOC_FL_COLLAPSE_RANGE, "COLLAPSE_RANGE"}, \
95-
{ FALLOC_FL_ZERO_RANGE, "ZERO_RANGE"})
95+
{ FALLOC_FL_ZERO_RANGE, "ZERO_RANGE"}, \
96+
{ FALLOC_FL_WRITE_ZEROES, "WRITE_ZEROES"})
9697

9798
TRACE_DEFINE_ENUM(EXT4_FC_REASON_XATTR);
9899
TRACE_DEFINE_ENUM(EXT4_FC_REASON_CROSS_RENAME);

0 commit comments

Comments
 (0)