Skip to content

Commit f7eaacb

Browse files
LiBaokun96tytso
authored andcommitted
ext4: convert free groups order lists to xarrays
While traversing the list, holding a spin_lock prevents load_buddy, making direct use of ext4_try_lock_group impossible. This can lead to a bouncing scenario where spin_is_locked(grp_A) succeeds, but ext4_try_lock_group() fails, forcing the list traversal to repeatedly restart from grp_A. In contrast, linear traversal directly uses ext4_try_lock_group(), avoiding this bouncing. Therefore, we need a lockless, ordered traversal to achieve linear-like efficiency. Therefore, this commit converts both average fragment size lists and largest free order lists into ordered xarrays. In an xarray, the index represents the block group number and the value holds the block group information; a non-empty value indicates the block group's presence. While insertion and deletion complexity remain O(1), lookup complexity changes from O(1) to O(nlogn), which may slightly reduce single-threaded performance. Additionally, xarray insertions might fail, potentially due to memory allocation issues. However, since we have linear traversal as a fallback, this isn't a major problem. Therefore, we've only added a warning message for insertion failures here. A helper function ext4_mb_find_good_group_xarray() is added to find good groups in the specified xarray starting at the specified position start, and when it reaches ngroups-1, it wraps around to 0 and then to start-1. This ensures an ordered traversal within the xarray. Performance test results are as follows: Single-process operations on an empty disk show negligible impact, while multi-process workloads demonstrate a noticeable performance gain. |CPU: Kunpeng 920 | P80 | P1 | |Memory: 512GB |------------------------|-------------------------| |960GB SSD (0.5GB/s)| base | patched | base | patched | |-------------------|-------|----------------|--------|----------------| |mb_optimize_scan=0 | 20097 | 19555 (-2.6%) | 316141 | 315636 (-0.2%) | |mb_optimize_scan=1 | 13318 | 15496 (+16.3%) | 325273 | 323569 (-0.5%) | |CPU: AMD 9654 * 2 | P96 | P1 | |Memory: 1536GB |------------------------|-------------------------| |960GB SSD (1GB/s) | base | patched | base | patched | |-------------------|-------|----------------|--------|----------------| |mb_optimize_scan=0 | 53603 | 53192 (-0.7%) | 214243 | 212678 (-0.7%) | |mb_optimize_scan=1 | 20887 | 37636 (+80.1%) | 213632 | 214189 (+0.2%) | [ Applied spelling fixes per discussion on the ext4-list see thread referened in the Link tag. --tytso] Signed-off-by: Baokun Li <[email protected]> Reviewed-by: Zhang Yi <[email protected]> Link: https://patch.msgid.link/[email protected] Signed-off-by: Theodore Ts'o <[email protected]>
1 parent 9c08e42 commit f7eaacb

File tree

3 files changed

+140
-126
lines changed

3 files changed

+140
-126
lines changed

fs/ext4/ext4.h

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1608,10 +1608,8 @@ struct ext4_sb_info {
16081608
struct list_head s_discard_list;
16091609
struct work_struct s_discard_work;
16101610
atomic_t s_retry_alloc_pending;
1611-
struct list_head *s_mb_avg_fragment_size;
1612-
rwlock_t *s_mb_avg_fragment_size_locks;
1613-
struct list_head *s_mb_largest_free_orders;
1614-
rwlock_t *s_mb_largest_free_orders_locks;
1611+
struct xarray *s_mb_avg_fragment_size;
1612+
struct xarray *s_mb_largest_free_orders;
16151613

16161614
/* tunables */
16171615
unsigned long s_stripe;
@@ -3485,8 +3483,6 @@ struct ext4_group_info {
34853483
void *bb_bitmap;
34863484
#endif
34873485
struct rw_semaphore alloc_sem;
3488-
struct list_head bb_avg_fragment_size_node;
3489-
struct list_head bb_largest_free_order_node;
34903486
ext4_grpblk_t bb_counters[]; /* Nr of free power-of-two-block
34913487
* regions, index is order.
34923488
* bb_counters[3] = 5 means

fs/ext4/mballoc-test.c

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -805,8 +805,6 @@ static void test_mb_mark_used(struct kunit *test)
805805
grp->bb_free = EXT4_CLUSTERS_PER_GROUP(sb);
806806
grp->bb_largest_free_order = -1;
807807
grp->bb_avg_fragment_size_order = -1;
808-
INIT_LIST_HEAD(&grp->bb_largest_free_order_node);
809-
INIT_LIST_HEAD(&grp->bb_avg_fragment_size_node);
810808
mbt_generate_test_ranges(sb, ranges, TEST_RANGE_COUNT);
811809
for (i = 0; i < TEST_RANGE_COUNT; i++)
812810
test_mb_mark_used_range(test, &e4b, ranges[i].start,
@@ -882,8 +880,6 @@ static void test_mb_free_blocks(struct kunit *test)
882880
grp->bb_free = 0;
883881
grp->bb_largest_free_order = -1;
884882
grp->bb_avg_fragment_size_order = -1;
885-
INIT_LIST_HEAD(&grp->bb_largest_free_order_node);
886-
INIT_LIST_HEAD(&grp->bb_avg_fragment_size_node);
887883
memset(bitmap, 0xff, sb->s_blocksize);
888884

889885
mbt_generate_test_ranges(sb, ranges, TEST_RANGE_COUNT);

0 commit comments

Comments
 (0)