Skip to content

Commit 5bb12b1

Browse files
riteshharjanitytso
authored andcommitted
ext4: Add support for EXT4_GET_BLOCKS_QUERY_LEAF_BLOCKS
There can be a case where there are contiguous extents on the adjacent leaf nodes of on-disk extent trees. So when someone tries to write to this contiguous range, ext4_map_blocks() call will split by returning 1 extent at a time if this is not already cached in extent_status tree cache (where if these extents when cached can get merged since they are contiguous). This is fine for a normal write however in case of atomic writes, it can't afford to break the write into two. Now this is also something that will only happen in the slow write case where we call ext4_map_blocks() for each of these extents spread across different leaf nodes. However, there is no guarantee that these extent status cache cannot be reclaimed before the last call to ext4_map_blocks() in ext4_map_blocks_atomic_write_slow(). Hence this patch adds support of EXT4_GET_BLOCKS_QUERY_LEAF_BLOCKS. This flag checks if the requested range can be fully found in extent status cache and return. If not, it looks up in on-disk extent tree via ext4_map_query_blocks(). If the found extent is the last entry in the leaf node, then it goes and queries the next lblk to see if there is an adjacent contiguous extent in the adjacent leaf node of the on-disk extent tree. Even though there can be a case where there are multiple adjacent extent entries spread across multiple leaf nodes. But we only read an adjacent leaf block i.e. in total of 2 extent entries spread across 2 leaf nodes. The reason for this is that we are mostly only going to support atomic writes with upto 64KB or maybe max upto 1MB of atomic write support. Acked-by: Darrick J. Wong <[email protected]> Co-developed-by: Ojaswin Mujoo <[email protected]> Signed-off-by: Ojaswin Mujoo <[email protected]> Signed-off-by: Ritesh Harjani (IBM) <[email protected]> Link: https://patch.msgid.link/6bb563e661f5fbd80e266a9e6ce6e29178f555f6.1747337952.git.ritesh.list@gmail.com Signed-off-by: Theodore Ts'o <[email protected]>
1 parent 255e7bc commit 5bb12b1

File tree

3 files changed

+112
-8
lines changed

3 files changed

+112
-8
lines changed

fs/ext4/ext4.h

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -256,9 +256,19 @@ struct ext4_allocation_request {
256256
#define EXT4_MAP_UNWRITTEN BIT(BH_Unwritten)
257257
#define EXT4_MAP_BOUNDARY BIT(BH_Boundary)
258258
#define EXT4_MAP_DELAYED BIT(BH_Delay)
259+
/*
260+
* This is for use in ext4_map_query_blocks() for a special case where we can
261+
* have a physically and logically contiguous blocks split across two leaf
262+
* nodes instead of a single extent. This is required in case of atomic writes
263+
* to know whether the returned extent is last in leaf. If yes, then lookup for
264+
* next in leaf block in ext4_map_query_blocks_next_in_leaf().
265+
* - This is never going to be added to any buffer head state.
266+
* - We use the next available bit after BH_BITMAP_UPTODATE.
267+
*/
268+
#define EXT4_MAP_QUERY_LAST_IN_LEAF BIT(BH_BITMAP_UPTODATE + 1)
259269
#define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\
260270
EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\
261-
EXT4_MAP_DELAYED)
271+
EXT4_MAP_DELAYED | EXT4_MAP_QUERY_LAST_IN_LEAF)
262272

263273
struct ext4_map_blocks {
264274
ext4_fsblk_t m_pblk;
@@ -728,6 +738,12 @@ enum {
728738
EXT4_GET_BLOCKS_IO_SUBMIT)
729739
/* Caller is in the atomic contex, find extent if it has been cached */
730740
#define EXT4_GET_BLOCKS_CACHED_NOWAIT 0x0800
741+
/*
742+
* Atomic write caller needs this to query in the slow path of mixed mapping
743+
* case, when a contiguous extent can be split across two adjacent leaf nodes.
744+
* Look EXT4_MAP_QUERY_LAST_IN_LEAF.
745+
*/
746+
#define EXT4_GET_BLOCKS_QUERY_LAST_IN_LEAF 0x1000
731747

732748
/*
733749
* The bit position of these flags must not overlap with any of the

fs/ext4/extents.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4437,6 +4437,18 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
44374437
allocated = map->m_len;
44384438
ext4_ext_show_leaf(inode, path);
44394439
out:
4440+
/*
4441+
* We never use EXT4_GET_BLOCKS_QUERY_LAST_IN_LEAF with CREATE flag.
4442+
* So we know that the depth used here is correct, since there was no
4443+
* block allocation done if EXT4_GET_BLOCKS_QUERY_LAST_IN_LEAF is set.
4444+
* If tomorrow we start using this QUERY flag with CREATE, then we will
4445+
* need to re-calculate the depth as it might have changed due to block
4446+
* allocation.
4447+
*/
4448+
if (flags & EXT4_GET_BLOCKS_QUERY_LAST_IN_LEAF)
4449+
if (!err && ex && (ex == EXT_LAST_EXTENT(path[depth].p_hdr)))
4450+
map->m_flags |= EXT4_MAP_QUERY_LAST_IN_LEAF;
4451+
44404452
ext4_free_ext_path(path);
44414453

44424454
trace_ext4_ext_map_blocks_exit(inode, flags, map,

fs/ext4/inode.c

Lines changed: 83 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -483,15 +483,73 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,
483483
}
484484
#endif /* ES_AGGRESSIVE_TEST */
485485

486+
static int ext4_map_query_blocks_next_in_leaf(handle_t *handle,
487+
struct inode *inode, struct ext4_map_blocks *map,
488+
unsigned int orig_mlen)
489+
{
490+
struct ext4_map_blocks map2;
491+
unsigned int status, status2;
492+
int retval;
493+
494+
status = map->m_flags & EXT4_MAP_UNWRITTEN ?
495+
EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
496+
497+
WARN_ON_ONCE(!(map->m_flags & EXT4_MAP_QUERY_LAST_IN_LEAF));
498+
WARN_ON_ONCE(orig_mlen <= map->m_len);
499+
500+
/* Prepare map2 for lookup in next leaf block */
501+
map2.m_lblk = map->m_lblk + map->m_len;
502+
map2.m_len = orig_mlen - map->m_len;
503+
map2.m_flags = 0;
504+
retval = ext4_ext_map_blocks(handle, inode, &map2, 0);
505+
506+
if (retval <= 0) {
507+
ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
508+
map->m_pblk, status, false);
509+
return map->m_len;
510+
}
511+
512+
if (unlikely(retval != map2.m_len)) {
513+
ext4_warning(inode->i_sb,
514+
"ES len assertion failed for inode "
515+
"%lu: retval %d != map->m_len %d",
516+
inode->i_ino, retval, map2.m_len);
517+
WARN_ON(1);
518+
}
519+
520+
status2 = map2.m_flags & EXT4_MAP_UNWRITTEN ?
521+
EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
522+
523+
/*
524+
* If map2 is contiguous with map, then let's insert it as a single
525+
* extent in es cache and return the combined length of both the maps.
526+
*/
527+
if (map->m_pblk + map->m_len == map2.m_pblk &&
528+
status == status2) {
529+
ext4_es_insert_extent(inode, map->m_lblk,
530+
map->m_len + map2.m_len, map->m_pblk,
531+
status, false);
532+
map->m_len += map2.m_len;
533+
} else {
534+
ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
535+
map->m_pblk, status, false);
536+
}
537+
538+
return map->m_len;
539+
}
540+
486541
static int ext4_map_query_blocks(handle_t *handle, struct inode *inode,
487542
struct ext4_map_blocks *map, int flags)
488543
{
489544
unsigned int status;
490545
int retval;
546+
unsigned int orig_mlen = map->m_len;
547+
unsigned int query_flags = flags & EXT4_GET_BLOCKS_QUERY_LAST_IN_LEAF;
491548

492549
flags &= EXT4_EX_FILTER;
493550
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
494-
retval = ext4_ext_map_blocks(handle, inode, map, flags);
551+
retval = ext4_ext_map_blocks(handle, inode, map,
552+
flags | query_flags);
495553
else
496554
retval = ext4_ind_map_blocks(handle, inode, map, flags);
497555

@@ -506,11 +564,23 @@ static int ext4_map_query_blocks(handle_t *handle, struct inode *inode,
506564
WARN_ON(1);
507565
}
508566

509-
status = map->m_flags & EXT4_MAP_UNWRITTEN ?
510-
EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
511-
ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
512-
map->m_pblk, status, false);
513-
return retval;
567+
/*
568+
* No need to query next in leaf:
569+
* - if returned extent is not last in leaf or
570+
* - if the last in leaf is the full requested range
571+
*/
572+
if (!(map->m_flags & EXT4_MAP_QUERY_LAST_IN_LEAF) ||
573+
((map->m_flags & EXT4_MAP_QUERY_LAST_IN_LEAF) &&
574+
(map->m_len == orig_mlen))) {
575+
status = map->m_flags & EXT4_MAP_UNWRITTEN ?
576+
EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
577+
ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
578+
map->m_pblk, status, false);
579+
return retval;
580+
}
581+
582+
return ext4_map_query_blocks_next_in_leaf(handle, inode, map,
583+
orig_mlen);
514584
}
515585

516586
static int ext4_map_create_blocks(handle_t *handle, struct inode *inode,
@@ -624,6 +694,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
624694
struct extent_status es;
625695
int retval;
626696
int ret = 0;
697+
unsigned int orig_mlen = map->m_len;
627698
#ifdef ES_AGGRESSIVE_TEST
628699
struct ext4_map_blocks orig_map;
629700

@@ -685,7 +756,12 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
685756
ext4_map_blocks_es_recheck(handle, inode, map,
686757
&orig_map, flags);
687758
#endif
688-
goto found;
759+
if (!(flags & EXT4_GET_BLOCKS_QUERY_LAST_IN_LEAF) ||
760+
orig_mlen == map->m_len)
761+
goto found;
762+
763+
if (flags & EXT4_GET_BLOCKS_QUERY_LAST_IN_LEAF)
764+
map->m_len = orig_mlen;
689765
}
690766
/*
691767
* In the query cache no-wait mode, nothing we can do more if we

0 commit comments

Comments
 (0)