Skip to content

Commit 8f2c3b7

Browse files
LiBaokun96tytso
authored andcommitted
ext4: utilize multiple global goals to reduce contention
When allocating data blocks, if the first try (goal allocation) fails and stream allocation is on, it tries a global goal starting from the last group we used (s_mb_last_group). This helps cluster large files together to reduce free space fragmentation, and the data block contiguity also accelerates write-back to disk. However, when multiple processes allocate blocks, having just one global goal means they all fight over the same group. This drastically lowers the chances of extents merging and leads to much worse file fragmentation. To mitigate this multi-process contention, we now employ multiple global goals, with the number of goals being the minimum between the number of possible CPUs and one-quarter of the filesystem's total block group count. To ensure a consistent goal for each inode, we select the corresponding goal by taking the inode number modulo the total number of goals. Performance test data follows: Test: Running will-it-scale/fallocate2 on CPU-bound containers. Observation: Average fallocate operations per container per second. |CPU: Kunpeng 920 | P80 | P1 | |Memory: 512GB |------------------------|-------------------------| |960GB SSD (0.5GB/s)| base | patched | base | patched | |-------------------|-------|----------------|--------|----------------| |mb_optimize_scan=0 | 9636 | 19628 (+103%) | 337597 | 320885 (-4.9%) | |mb_optimize_scan=1 | 4834 | 7129 (+47.4%) | 341440 | 321275 (-5.9%) | |CPU: AMD 9654 * 2 | P96 | P1 | |Memory: 1536GB |------------------------|-------------------------| |960GB SSD (1GB/s) | base | patched | base | patched | |-------------------|-------|----------------|--------|----------------| |mb_optimize_scan=0 | 22341 | 53760 (+140%) | 219707 | 213145 (-2.9%) | |mb_optimize_scan=1 | 9177 | 12716 (+38.5%) | 215732 | 215262 (+0.2%) | Suggested-by: Jan Kara <[email protected]> Signed-off-by: Baokun Li <[email protected]> Reviewed-by: Zhang Yi <[email protected]> Link: https://patch.msgid.link/[email protected] Signed-off-by: Theodore Ts'o <[email protected]>
1 parent 4b41deb commit 8f2c3b7

File tree

2 files changed

+27
-6
lines changed

2 files changed

+27
-6
lines changed

fs/ext4/ext4.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1629,14 +1629,16 @@ struct ext4_sb_info {
16291629
unsigned int s_mb_order2_reqs;
16301630
unsigned int s_mb_group_prealloc;
16311631
unsigned int s_max_dir_size_kb;
1632-
/* where last allocation was done - for stream allocation */
1633-
ext4_group_t s_mb_last_group;
16341632
unsigned int s_mb_prefetch;
16351633
unsigned int s_mb_prefetch_limit;
16361634
unsigned int s_mb_best_avail_max_trim_order;
16371635
unsigned int s_sb_update_sec;
16381636
unsigned int s_sb_update_kb;
16391637

1638+
/* where last allocation was done - for stream allocation */
1639+
ext4_group_t *s_mb_last_groups;
1640+
unsigned int s_mb_nr_global_goals;
1641+
16401642
/* stats for buddy allocator */
16411643
atomic_t s_bal_reqs; /* number of reqs with len > 1 */
16421644
atomic_t s_bal_success; /* we found long enough chunks */

fs/ext4/mballoc.c

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2168,8 +2168,12 @@ static void ext4_mb_use_best_found(struct ext4_allocation_context *ac,
21682168
ac->ac_buddy_folio = e4b->bd_buddy_folio;
21692169
folio_get(ac->ac_buddy_folio);
21702170
/* store last allocated for subsequent stream allocation */
2171-
if (ac->ac_flags & EXT4_MB_STREAM_ALLOC)
2172-
WRITE_ONCE(sbi->s_mb_last_group, ac->ac_f_ex.fe_group);
2171+
if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
2172+
int hash = ac->ac_inode->i_ino % sbi->s_mb_nr_global_goals;
2173+
2174+
WRITE_ONCE(sbi->s_mb_last_groups[hash], ac->ac_f_ex.fe_group);
2175+
}
2176+
21732177
/*
21742178
* As we've just preallocated more space than
21752179
* user requested originally, we store allocated
@@ -2842,7 +2846,9 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
28422846

28432847
/* if stream allocation is enabled, use global goal */
28442848
if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
2845-
ac->ac_g_ex.fe_group = READ_ONCE(sbi->s_mb_last_group);
2849+
int hash = ac->ac_inode->i_ino % sbi->s_mb_nr_global_goals;
2850+
2851+
ac->ac_g_ex.fe_group = READ_ONCE(sbi->s_mb_last_groups[hash]);
28462852
ac->ac_g_ex.fe_start = -1;
28472853
ac->ac_flags &= ~EXT4_MB_HINT_TRY_GOAL;
28482854
}
@@ -3722,10 +3728,19 @@ int ext4_mb_init(struct super_block *sb)
37223728
sbi->s_mb_group_prealloc, EXT4_NUM_B2C(sbi, sbi->s_stripe));
37233729
}
37243730

3731+
sbi->s_mb_nr_global_goals = umin(num_possible_cpus(),
3732+
DIV_ROUND_UP(sbi->s_groups_count, 4));
3733+
sbi->s_mb_last_groups = kcalloc(sbi->s_mb_nr_global_goals,
3734+
sizeof(ext4_group_t), GFP_KERNEL);
3735+
if (sbi->s_mb_last_groups == NULL) {
3736+
ret = -ENOMEM;
3737+
goto out;
3738+
}
3739+
37253740
sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
37263741
if (sbi->s_locality_groups == NULL) {
37273742
ret = -ENOMEM;
3728-
goto out;
3743+
goto out_free_last_groups;
37293744
}
37303745
for_each_possible_cpu(i) {
37313746
struct ext4_locality_group *lg;
@@ -3750,6 +3765,9 @@ int ext4_mb_init(struct super_block *sb)
37503765
out_free_locality_groups:
37513766
free_percpu(sbi->s_locality_groups);
37523767
sbi->s_locality_groups = NULL;
3768+
out_free_last_groups:
3769+
kfree(sbi->s_mb_last_groups);
3770+
sbi->s_mb_last_groups = NULL;
37533771
out:
37543772
kfree(sbi->s_mb_avg_fragment_size);
37553773
kfree(sbi->s_mb_avg_fragment_size_locks);
@@ -3854,6 +3872,7 @@ void ext4_mb_release(struct super_block *sb)
38543872
}
38553873

38563874
free_percpu(sbi->s_locality_groups);
3875+
kfree(sbi->s_mb_last_groups);
38573876
}
38583877

38593878
static inline int ext4_issue_discard(struct super_block *sb,

0 commit comments

Comments
 (0)