Skip to content

Commit a3ce570

Browse files
LiBaokun96tytso
authored andcommitted
ext4: implement linear-like traversal across order xarrays
Although we now perform ordered traversal within an xarray, this is currently limited to a single xarray. However, we have multiple such xarrays, which prevents us from guaranteeing a linear-like traversal where all groups on the right are visited before all groups on the left. For example, suppose we have 128 block groups, with a target group of 64, a target length corresponding to an order of 1, and available free groups of 16 (order 1) and group 65 (order 8): For linear traversal, when no suitable free block is found in group 64, it will search in the next block group until group 127, then start searching from 0 up to block group 63. It ensures continuous forward traversal, which is consistent with the unidirectional rotation behavior of HDD platters. Additionally, the block group lock contention during freeing block is unavoidable. The goal increasing from 0 to 64 indicates that previously scanned groups (which had no suitable free space and are likely to free blocks later) and skipped groups (which are currently in use) have newly freed some used blocks. If we allocate blocks in these groups, the probability of competing with other processes increases. For non-linear traversal, we first traverse all groups in order_1. If only group 16 has free space in this list, we first traverse [63, 128), then traverse [0, 64) to find the available group 16, and then allocate blocks in group 16. Therefore, it cannot guarantee continuous traversal in one direction, thus increasing the probability of contention. So refactor ext4_mb_scan_groups_xarray() to ext4_mb_scan_groups_xa_range() to only traverse a fixed range of groups, and move the logic for handling wrap around to the caller. The caller first iterates through all xarrays in the range [start, ngroups) and then through the range [0, start). This approach simulates a linear scan, which reduces contention between freeing blocks and allocating blocks. Assume we have the following groups, where "|" denotes the xarray traversal start position: order_1_groups: AB | CD order_2_groups: EF | GH Traversal order: Before: C > D > A > B > G > H > E > F After: C > D > G > H > A > B > E > F Performance test data follows: |CPU: Kunpeng 920 | P80 | P1 | |Memory: 512GB |------------------------|-------------------------| |960GB SSD (0.5GB/s)| base | patched | base | patched | |-------------------|-------|----------------|--------|----------------| |mb_optimize_scan=0 | 19555 | 20049 (+2.5%) | 315636 | 316724 (-0.3%) | |mb_optimize_scan=1 | 15496 | 19342 (+24.8%) | 323569 | 328324 (+1.4%) | |CPU: AMD 9654 * 2 | P96 | P1 | |Memory: 1536GB |------------------------|-------------------------| |960GB SSD (1GB/s) | base | patched | base | patched | |-------------------|-------|----------------|--------|----------------| |mb_optimize_scan=0 | 53192 | 52125 (-2.0%) | 212678 | 215136 (+1.1%) | |mb_optimize_scan=1 | 37636 | 50331 (+33.7%) | 214189 | 209431 (-2.2%) | Signed-off-by: Baokun Li <[email protected]> Reviewed-by: Zhang Yi <[email protected]> Link: https://patch.msgid.link/[email protected] Signed-off-by: Theodore Ts'o <[email protected]>
1 parent 6347558 commit a3ce570

File tree

1 file changed

+47
-21
lines changed

1 file changed

+47
-21
lines changed

fs/ext4/mballoc.c

Lines changed: 47 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -875,21 +875,20 @@ mb_update_avg_fragment_size(struct super_block *sb, struct ext4_group_info *grp)
875875
}
876876
}
877877

878-
static int ext4_mb_scan_groups_xarray(struct ext4_allocation_context *ac,
879-
struct xarray *xa, ext4_group_t start)
878+
static int ext4_mb_scan_groups_xa_range(struct ext4_allocation_context *ac,
879+
struct xarray *xa,
880+
ext4_group_t start, ext4_group_t end)
880881
{
881882
struct super_block *sb = ac->ac_sb;
882883
struct ext4_sb_info *sbi = EXT4_SB(sb);
883884
enum criteria cr = ac->ac_criteria;
884885
ext4_group_t ngroups = ext4_get_groups_count(sb);
885886
unsigned long group = start;
886-
ext4_group_t end = ngroups;
887887
struct ext4_group_info *grp;
888888

889-
if (WARN_ON_ONCE(start >= end))
889+
if (WARN_ON_ONCE(end > ngroups || start >= end))
890890
return 0;
891891

892-
wrap_around:
893892
xa_for_each_range(xa, group, grp, start, end - 1) {
894893
int err;
895894

@@ -903,28 +902,23 @@ static int ext4_mb_scan_groups_xarray(struct ext4_allocation_context *ac,
903902
cond_resched();
904903
}
905904

906-
if (start) {
907-
end = start;
908-
start = 0;
909-
goto wrap_around;
910-
}
911-
912905
return 0;
913906
}
914907

915908
/*
916909
* Find a suitable group of given order from the largest free orders xarray.
917910
*/
918-
static int
919-
ext4_mb_scan_groups_largest_free_order(struct ext4_allocation_context *ac,
920-
int order, ext4_group_t start)
911+
static inline int
912+
ext4_mb_scan_groups_largest_free_order_range(struct ext4_allocation_context *ac,
913+
int order, ext4_group_t start,
914+
ext4_group_t end)
921915
{
922916
struct xarray *xa = &EXT4_SB(ac->ac_sb)->s_mb_largest_free_orders[order];
923917

924918
if (xa_empty(xa))
925919
return 0;
926920

927-
return ext4_mb_scan_groups_xarray(ac, xa, start);
921+
return ext4_mb_scan_groups_xa_range(ac, xa, start, end);
928922
}
929923

930924
/*
@@ -937,12 +931,22 @@ static int ext4_mb_scan_groups_p2_aligned(struct ext4_allocation_context *ac,
937931
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
938932
int i;
939933
int ret = 0;
934+
ext4_group_t start, end;
940935

936+
start = group;
937+
end = ext4_get_groups_count(ac->ac_sb);
938+
wrap_around:
941939
for (i = ac->ac_2order; i < MB_NUM_ORDERS(ac->ac_sb); i++) {
942-
ret = ext4_mb_scan_groups_largest_free_order(ac, i, group);
940+
ret = ext4_mb_scan_groups_largest_free_order_range(ac, i,
941+
start, end);
943942
if (ret || ac->ac_status != AC_STATUS_CONTINUE)
944943
return ret;
945944
}
945+
if (start) {
946+
end = start;
947+
start = 0;
948+
goto wrap_around;
949+
}
946950

947951
if (sbi->s_mb_stats)
948952
atomic64_inc(&sbi->s_bal_cX_failed[ac->ac_criteria]);
@@ -955,15 +959,17 @@ static int ext4_mb_scan_groups_p2_aligned(struct ext4_allocation_context *ac,
955959
/*
956960
* Find a suitable group of given order from the average fragments xarray.
957961
*/
958-
static int ext4_mb_scan_groups_avg_frag_order(struct ext4_allocation_context *ac,
959-
int order, ext4_group_t start)
962+
static int
963+
ext4_mb_scan_groups_avg_frag_order_range(struct ext4_allocation_context *ac,
964+
int order, ext4_group_t start,
965+
ext4_group_t end)
960966
{
961967
struct xarray *xa = &EXT4_SB(ac->ac_sb)->s_mb_avg_fragment_size[order];
962968

963969
if (xa_empty(xa))
964970
return 0;
965971

966-
return ext4_mb_scan_groups_xarray(ac, xa, start);
972+
return ext4_mb_scan_groups_xa_range(ac, xa, start, end);
967973
}
968974

969975
/*
@@ -975,13 +981,23 @@ static int ext4_mb_scan_groups_goal_fast(struct ext4_allocation_context *ac,
975981
{
976982
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
977983
int i, ret = 0;
984+
ext4_group_t start, end;
978985

986+
start = group;
987+
end = ext4_get_groups_count(ac->ac_sb);
988+
wrap_around:
979989
i = mb_avg_fragment_size_order(ac->ac_sb, ac->ac_g_ex.fe_len);
980990
for (; i < MB_NUM_ORDERS(ac->ac_sb); i++) {
981-
ret = ext4_mb_scan_groups_avg_frag_order(ac, i, group);
991+
ret = ext4_mb_scan_groups_avg_frag_order_range(ac, i,
992+
start, end);
982993
if (ret || ac->ac_status != AC_STATUS_CONTINUE)
983994
return ret;
984995
}
996+
if (start) {
997+
end = start;
998+
start = 0;
999+
goto wrap_around;
1000+
}
9851001

9861002
if (sbi->s_mb_stats)
9871003
atomic64_inc(&sbi->s_bal_cX_failed[ac->ac_criteria]);
@@ -1017,6 +1033,7 @@ static int ext4_mb_scan_groups_best_avail(struct ext4_allocation_context *ac,
10171033
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
10181034
int i, order, min_order;
10191035
unsigned long num_stripe_clusters = 0;
1036+
ext4_group_t start, end;
10201037

10211038
/*
10221039
* mb_avg_fragment_size_order() returns order in a way that makes
@@ -1048,6 +1065,9 @@ static int ext4_mb_scan_groups_best_avail(struct ext4_allocation_context *ac,
10481065
if (1 << min_order < ac->ac_o_ex.fe_len)
10491066
min_order = fls(ac->ac_o_ex.fe_len);
10501067

1068+
start = group;
1069+
end = ext4_get_groups_count(ac->ac_sb);
1070+
wrap_around:
10511071
for (i = order; i >= min_order; i--) {
10521072
int frag_order;
10531073
/*
@@ -1070,10 +1090,16 @@ static int ext4_mb_scan_groups_best_avail(struct ext4_allocation_context *ac,
10701090
frag_order = mb_avg_fragment_size_order(ac->ac_sb,
10711091
ac->ac_g_ex.fe_len);
10721092

1073-
ret = ext4_mb_scan_groups_avg_frag_order(ac, frag_order, group);
1093+
ret = ext4_mb_scan_groups_avg_frag_order_range(ac, frag_order,
1094+
start, end);
10741095
if (ret || ac->ac_status != AC_STATUS_CONTINUE)
10751096
return ret;
10761097
}
1098+
if (start) {
1099+
end = start;
1100+
start = 0;
1101+
goto wrap_around;
1102+
}
10771103

10781104
/* Reset goal length to original goal length before falling into CR_GOAL_LEN_SLOW */
10791105
ac->ac_g_ex.fe_len = ac->ac_orig_goal_len;

0 commit comments

Comments
 (0)