Skip to content

Commit 33e728c

Browse files
Kemeng Shitytso
authored andcommitted
ext4: Separate block bitmap and buddy bitmap freeing in ext4_mb_clear_bb()
This patch separates block bitmap and buddy bitmap freeing in order to update block bitmap with ext4_mb_mark_context in following patch. Separated freeing is safe with concurrent allocation as long as: 1. Firstly allocate block in buddy bitmap, and then in block bitmap. 2. Firstly free block in block bitmap, and then buddy bitmap. Then freed block will only be available to allocation when both buddy bitmap and block bitmap are updated by freeing. Allocation obeys rule 1 already, just do sperated freeing with rule 2. Separated freeing has no race with generate_buddy as: Once ext4_mb_load_buddy_gfp is executed successfully, the update-to-date buddy page can be found in sbi->s_buddy_cache and no more buddy initialization of the buddy page will be executed concurrently until buddy page is unloaded. As we always do free in "load buddy, free, unload buddy" sequence, separated freeing has no race with generate_buddy. Signed-off-by: Kemeng Shi <[email protected]> Reviewed-by: "Ritesh Harjani (IBM)" <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Theodore Ts'o <[email protected]>
1 parent 2f94711 commit 33e728c

File tree

1 file changed

+49
-49
lines changed

1 file changed

+49
-49
lines changed

fs/ext4/mballoc.c

Lines changed: 49 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -6388,7 +6388,7 @@ static void ext4_mb_clear_bb(handle_t *handle, struct inode *inode,
63886388
ext4_error(sb, "Freeing blocks in system zone - "
63896389
"Block = %llu, count = %lu", block, count);
63906390
/* err = 0. ext4_std_error should be a no op */
6391-
goto error_return;
6391+
goto error_out;
63926392
}
63936393
flags |= EXT4_FREE_BLOCKS_VALIDATED;
63946394

@@ -6412,31 +6412,39 @@ static void ext4_mb_clear_bb(handle_t *handle, struct inode *inode,
64126412
flags &= ~EXT4_FREE_BLOCKS_VALIDATED;
64136413
}
64146414
count_clusters = EXT4_NUM_B2C(sbi, count);
6415+
trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters);
6416+
6417+
/* __GFP_NOFAIL: retry infinitely, ignore TIF_MEMDIE and memcg limit. */
6418+
err = ext4_mb_load_buddy_gfp(sb, block_group, &e4b,
6419+
GFP_NOFS|__GFP_NOFAIL);
6420+
if (err)
6421+
goto error_out;
6422+
6423+
if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
6424+
!ext4_inode_block_valid(inode, block, count)) {
6425+
ext4_error(sb, "Freeing blocks in system zone - "
6426+
"Block = %llu, count = %lu", block, count);
6427+
/* err = 0. ext4_std_error should be a no op */
6428+
goto error_clean;
6429+
}
6430+
64156431
bitmap_bh = ext4_read_block_bitmap(sb, block_group);
64166432
if (IS_ERR(bitmap_bh)) {
64176433
err = PTR_ERR(bitmap_bh);
64186434
bitmap_bh = NULL;
6419-
goto error_return;
6435+
goto error_clean;
64206436
}
64216437
gdp = ext4_get_group_desc(sb, block_group, &gd_bh);
64226438
if (!gdp) {
64236439
err = -EIO;
6424-
goto error_return;
6425-
}
6426-
6427-
if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
6428-
!ext4_inode_block_valid(inode, block, count)) {
6429-
ext4_error(sb, "Freeing blocks in system zone - "
6430-
"Block = %llu, count = %lu", block, count);
6431-
/* err = 0. ext4_std_error should be a no op */
6432-
goto error_return;
6440+
goto error_clean;
64336441
}
64346442

64356443
BUFFER_TRACE(bitmap_bh, "getting write access");
64366444
err = ext4_journal_get_write_access(handle, sb, bitmap_bh,
64376445
EXT4_JTR_NONE);
64386446
if (err)
6439-
goto error_return;
6447+
goto error_clean;
64406448

64416449
/*
64426450
* We are about to modify some metadata. Call the journal APIs
@@ -6446,21 +6454,38 @@ static void ext4_mb_clear_bb(handle_t *handle, struct inode *inode,
64466454
BUFFER_TRACE(gd_bh, "get_write_access");
64476455
err = ext4_journal_get_write_access(handle, sb, gd_bh, EXT4_JTR_NONE);
64486456
if (err)
6449-
goto error_return;
6457+
goto error_clean;
64506458
#ifdef AGGRESSIVE_CHECK
64516459
{
64526460
int i;
64536461
for (i = 0; i < count_clusters; i++)
64546462
BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
64556463
}
64566464
#endif
6457-
trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters);
6465+
ext4_lock_group(sb, block_group);
6466+
mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
6467+
ret = ext4_free_group_clusters(sb, gdp) + count_clusters;
6468+
ext4_free_group_clusters_set(sb, gdp, ret);
6469+
ext4_block_bitmap_csum_set(sb, gdp, bitmap_bh);
6470+
ext4_group_desc_csum_set(sb, block_group, gdp);
6471+
ext4_unlock_group(sb, block_group);
64586472

6459-
/* __GFP_NOFAIL: retry infinitely, ignore TIF_MEMDIE and memcg limit. */
6460-
err = ext4_mb_load_buddy_gfp(sb, block_group, &e4b,
6461-
GFP_NOFS|__GFP_NOFAIL);
6462-
if (err)
6463-
goto error_return;
6473+
if (sbi->s_log_groups_per_flex) {
6474+
ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
6475+
atomic64_add(count_clusters,
6476+
&sbi_array_rcu_deref(sbi, s_flex_groups,
6477+
flex_group)->free_clusters);
6478+
}
6479+
6480+
/* We dirtied the bitmap block */
6481+
BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
6482+
err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
6483+
6484+
/* And the group descriptor block */
6485+
BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
6486+
ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
6487+
if (!err)
6488+
err = ret;
64646489

64656490
/*
64666491
* We need to make sure we don't reuse the freed block until after the
@@ -6484,13 +6509,8 @@ static void ext4_mb_clear_bb(handle_t *handle, struct inode *inode,
64846509
new_entry->efd_tid = handle->h_transaction->t_tid;
64856510

64866511
ext4_lock_group(sb, block_group);
6487-
mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
64886512
ext4_mb_free_metadata(handle, &e4b, new_entry);
64896513
} else {
6490-
/* need to update group_info->bb_free and bitmap
6491-
* with group lock held. generate_buddy look at
6492-
* them with group lock_held
6493-
*/
64946514
if (test_opt(sb, DISCARD)) {
64956515
err = ext4_issue_discard(sb, block_group, bit,
64966516
count_clusters, NULL);
@@ -6503,23 +6523,11 @@ static void ext4_mb_clear_bb(handle_t *handle, struct inode *inode,
65036523
EXT4_MB_GRP_CLEAR_TRIMMED(e4b.bd_info);
65046524

65056525
ext4_lock_group(sb, block_group);
6506-
mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
65076526
mb_free_blocks(inode, &e4b, bit, count_clusters);
65086527
}
65096528

6510-
ret = ext4_free_group_clusters(sb, gdp) + count_clusters;
6511-
ext4_free_group_clusters_set(sb, gdp, ret);
6512-
ext4_block_bitmap_csum_set(sb, gdp, bitmap_bh);
6513-
ext4_group_desc_csum_set(sb, block_group, gdp);
65146529
ext4_unlock_group(sb, block_group);
65156530

6516-
if (sbi->s_log_groups_per_flex) {
6517-
ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
6518-
atomic64_add(count_clusters,
6519-
&sbi_array_rcu_deref(sbi, s_flex_groups,
6520-
flex_group)->free_clusters);
6521-
}
6522-
65236531
/*
65246532
* on a bigalloc file system, defer the s_freeclusters_counter
65256533
* update to the caller (ext4_remove_space and friends) so they
@@ -6532,28 +6540,20 @@ static void ext4_mb_clear_bb(handle_t *handle, struct inode *inode,
65326540
count_clusters);
65336541
}
65346542

6535-
ext4_mb_unload_buddy(&e4b);
6536-
6537-
/* We dirtied the bitmap block */
6538-
BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
6539-
err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
6540-
6541-
/* And the group descriptor block */
6542-
BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
6543-
ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
6544-
if (!err)
6545-
err = ret;
6546-
65476543
if (overflow && !err) {
65486544
block += count;
65496545
count = overflow;
6546+
ext4_mb_unload_buddy(&e4b);
65506547
put_bh(bitmap_bh);
65516548
/* The range changed so it's no longer validated */
65526549
flags &= ~EXT4_FREE_BLOCKS_VALIDATED;
65536550
goto do_more;
65546551
}
6555-
error_return:
6552+
6553+
error_clean:
6554+
ext4_mb_unload_buddy(&e4b);
65566555
brelse(bitmap_bh);
6556+
error_out:
65576557
ext4_std_error(sb, err);
65586558
}
65596559

0 commit comments

Comments
 (0)