Skip to content

Commit e7f101a

Browse files
LiBaokun96tytso
authored andcommitted
ext4: merge freed extent with existing extents before insertion
Attempt to merge ext4_free_data with already inserted free extents prior to adding new ones. This strategy drastically cuts down the number of times locks are held. For example, if prev, new, and next extents are all mergeable, the existing code (before this patch) requires acquiring the s_md_lock three times: prev merge into new and free prev // hold lock next merge into new and free next // hold lock insert new // hold lock After the patch, it only needs to be acquired once: new merge into next and free new // no lock next merge into prev and free next // hold lock Performance test data follows: Test: Running will-it-scale/fallocate2 on CPU-bound containers. Observation: Average fallocate operations per container per second. |CPU: Kunpeng 920 | P80 | P1 | |Memory: 512GB |------------------------|-------------------------| |960GB SSD (0.5GB/s)| base | patched | base | patched | |-------------------|-------|----------------|--------|----------------| |mb_optimize_scan=0 | 20043 | 20097 (+0.2%) | 314331 | 316141 (+0.5%) | |mb_optimize_scan=1 | 7290 | 13318 (+87.4%) | 324226 | 325273 (+0.3%) | |CPU: AMD 9654 * 2 | P96 | P1 | |Memory: 1536GB |------------------------|-------------------------| |960GB SSD (1GB/s) | base | patched | base | patched | |-------------------|-------|----------------|--------|----------------| |mb_optimize_scan=0 | 54999 | 53603 (-2.5%) | 214380 | 214243 (-0.06%)| |mb_optimize_scan=1 | 13497 | 20887 (+54.6%) | 216276 | 213632 (-1.2%) | Signed-off-by: Baokun Li <[email protected]> Reviewed-by: Jan Kara <[email protected]> Reviewed-by: Zhang Yi <[email protected]> Link: https://patch.msgid.link/[email protected] Signed-off-by: Theodore Ts'o <[email protected]>
1 parent 0a2326f commit e7f101a

File tree

1 file changed

+76
-37
lines changed

1 file changed

+76
-37
lines changed

fs/ext4/mballoc.c

Lines changed: 76 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -6307,28 +6307,63 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
63076307
* are contiguous, AND the extents were freed by the same transaction,
63086308
* AND the blocks are associated with the same group.
63096309
*/
6310-
static void ext4_try_merge_freed_extent(struct ext4_sb_info *sbi,
6311-
struct ext4_free_data *entry,
6312-
struct ext4_free_data *new_entry,
6313-
struct rb_root *entry_rb_root)
6310+
static inline bool
6311+
ext4_freed_extents_can_be_merged(struct ext4_free_data *entry1,
6312+
struct ext4_free_data *entry2)
63146313
{
6315-
if ((entry->efd_tid != new_entry->efd_tid) ||
6316-
(entry->efd_group != new_entry->efd_group))
6317-
return;
6318-
if (entry->efd_start_cluster + entry->efd_count ==
6319-
new_entry->efd_start_cluster) {
6320-
new_entry->efd_start_cluster = entry->efd_start_cluster;
6321-
new_entry->efd_count += entry->efd_count;
6322-
} else if (new_entry->efd_start_cluster + new_entry->efd_count ==
6323-
entry->efd_start_cluster) {
6324-
new_entry->efd_count += entry->efd_count;
6325-
} else
6326-
return;
6314+
if (entry1->efd_tid != entry2->efd_tid)
6315+
return false;
6316+
if (entry1->efd_start_cluster + entry1->efd_count !=
6317+
entry2->efd_start_cluster)
6318+
return false;
6319+
if (WARN_ON_ONCE(entry1->efd_group != entry2->efd_group))
6320+
return false;
6321+
return true;
6322+
}
6323+
6324+
static inline void
6325+
ext4_merge_freed_extents(struct ext4_sb_info *sbi, struct rb_root *root,
6326+
struct ext4_free_data *entry1,
6327+
struct ext4_free_data *entry2)
6328+
{
6329+
entry1->efd_count += entry2->efd_count;
63276330
spin_lock(&sbi->s_md_lock);
6328-
list_del(&entry->efd_list);
6331+
list_del(&entry2->efd_list);
63296332
spin_unlock(&sbi->s_md_lock);
6330-
rb_erase(&entry->efd_node, entry_rb_root);
6331-
kmem_cache_free(ext4_free_data_cachep, entry);
6333+
rb_erase(&entry2->efd_node, root);
6334+
kmem_cache_free(ext4_free_data_cachep, entry2);
6335+
}
6336+
6337+
static inline void
6338+
ext4_try_merge_freed_extent_prev(struct ext4_sb_info *sbi, struct rb_root *root,
6339+
struct ext4_free_data *entry)
6340+
{
6341+
struct ext4_free_data *prev;
6342+
struct rb_node *node;
6343+
6344+
node = rb_prev(&entry->efd_node);
6345+
if (!node)
6346+
return;
6347+
6348+
prev = rb_entry(node, struct ext4_free_data, efd_node);
6349+
if (ext4_freed_extents_can_be_merged(prev, entry))
6350+
ext4_merge_freed_extents(sbi, root, prev, entry);
6351+
}
6352+
6353+
static inline void
6354+
ext4_try_merge_freed_extent_next(struct ext4_sb_info *sbi, struct rb_root *root,
6355+
struct ext4_free_data *entry)
6356+
{
6357+
struct ext4_free_data *next;
6358+
struct rb_node *node;
6359+
6360+
node = rb_next(&entry->efd_node);
6361+
if (!node)
6362+
return;
6363+
6364+
next = rb_entry(node, struct ext4_free_data, efd_node);
6365+
if (ext4_freed_extents_can_be_merged(entry, next))
6366+
ext4_merge_freed_extents(sbi, root, entry, next);
63326367
}
63336368

63346369
static noinline_for_stack void
@@ -6338,11 +6373,12 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
63386373
ext4_group_t group = e4b->bd_group;
63396374
ext4_grpblk_t cluster;
63406375
ext4_grpblk_t clusters = new_entry->efd_count;
6341-
struct ext4_free_data *entry;
6376+
struct ext4_free_data *entry = NULL;
63426377
struct ext4_group_info *db = e4b->bd_info;
63436378
struct super_block *sb = e4b->bd_sb;
63446379
struct ext4_sb_info *sbi = EXT4_SB(sb);
6345-
struct rb_node **n = &db->bb_free_root.rb_node, *node;
6380+
struct rb_root *root = &db->bb_free_root;
6381+
struct rb_node **n = &root->rb_node;
63466382
struct rb_node *parent = NULL, *new_node;
63476383

63486384
BUG_ON(!ext4_handle_valid(handle));
@@ -6378,27 +6414,30 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
63786414
}
63796415
}
63806416

6381-
rb_link_node(new_node, parent, n);
6382-
rb_insert_color(new_node, &db->bb_free_root);
6383-
6384-
/* Now try to see the extent can be merged to left and right */
6385-
node = rb_prev(new_node);
6386-
if (node) {
6387-
entry = rb_entry(node, struct ext4_free_data, efd_node);
6388-
ext4_try_merge_freed_extent(sbi, entry, new_entry,
6389-
&(db->bb_free_root));
6417+
atomic_add(clusters, &sbi->s_mb_free_pending);
6418+
if (!entry)
6419+
goto insert;
6420+
6421+
/* Now try to see the extent can be merged to prev and next */
6422+
if (ext4_freed_extents_can_be_merged(new_entry, entry)) {
6423+
entry->efd_start_cluster = cluster;
6424+
entry->efd_count += new_entry->efd_count;
6425+
kmem_cache_free(ext4_free_data_cachep, new_entry);
6426+
ext4_try_merge_freed_extent_prev(sbi, root, entry);
6427+
return;
63906428
}
6391-
6392-
node = rb_next(new_node);
6393-
if (node) {
6394-
entry = rb_entry(node, struct ext4_free_data, efd_node);
6395-
ext4_try_merge_freed_extent(sbi, entry, new_entry,
6396-
&(db->bb_free_root));
6429+
if (ext4_freed_extents_can_be_merged(entry, new_entry)) {
6430+
entry->efd_count += new_entry->efd_count;
6431+
kmem_cache_free(ext4_free_data_cachep, new_entry);
6432+
ext4_try_merge_freed_extent_next(sbi, root, entry);
6433+
return;
63976434
}
6435+
insert:
6436+
rb_link_node(new_node, parent, n);
6437+
rb_insert_color(new_node, root);
63986438

63996439
spin_lock(&sbi->s_md_lock);
64006440
list_add_tail(&new_entry->efd_list, &sbi->s_freed_data_list[new_entry->efd_tid & 1]);
6401-
atomic_add(clusters, &sbi->s_mb_free_pending);
64026441
spin_unlock(&sbi->s_md_lock);
64036442
}
64046443

0 commit comments

Comments
 (0)