Skip to content

Commit 1cea5cf

Browse files
fdmananakdave
authored andcommitted
btrfs: ensure relocation never runs while we have send operations running
Relocation and send do not play well together because while send is running a block group can be relocated, a transaction committed and the respective disk extents get re-allocated and written to or discarded while send is about to do something with the extents. This was explained in commit 9e96749 ("Btrfs: prevent send failures and crashes due to concurrent relocation"), which prevented balance and send from running in parallel but it did not address one remaining case where chunk relocation can happen: shrinking a device (and device deletion which shrinks a device's size to 0 before deleting the device). We also have now one more case where relocation is triggered: on zoned filesystems partially used block groups get relocated by a background thread, introduced in commit 18bb8bb ("btrfs: zoned: automatically reclaim zones"). So make sure that instead of preventing balance from running when there are ongoing send operations, we prevent relocation from happening. This uses the infrastructure recently added by a patch that has the subject: "btrfs: add cancellable chunk relocation support". Also it adds a spinlock used exclusively for the exclusivity between send and relocation, as before fs_info->balance_mutex was used, which would make an attempt to run send to block waiting for balance to finish, which can take a lot of time on large filesystems. Signed-off-by: Filipe Manana <[email protected]> Reviewed-by: David Sterba <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent cbeaae4 commit 1cea5cf

File tree

6 files changed

+32
-19
lines changed

6 files changed

+32
-19
lines changed

fs/btrfs/block-group.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1491,7 +1491,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
14911491
container_of(work, struct btrfs_fs_info, reclaim_bgs_work);
14921492
struct btrfs_block_group *bg;
14931493
struct btrfs_space_info *space_info;
1494-
int ret;
1494+
LIST_HEAD(again_list);
14951495

14961496
if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
14971497
return;
@@ -1502,6 +1502,8 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
15021502
mutex_lock(&fs_info->reclaim_bgs_lock);
15031503
spin_lock(&fs_info->unused_bgs_lock);
15041504
while (!list_empty(&fs_info->reclaim_bgs)) {
1505+
int ret = 0;
1506+
15051507
bg = list_first_entry(&fs_info->reclaim_bgs,
15061508
struct btrfs_block_group,
15071509
bg_list);
@@ -1547,9 +1549,13 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
15471549
bg->start);
15481550

15491551
next:
1550-
btrfs_put_block_group(bg);
15511552
spin_lock(&fs_info->unused_bgs_lock);
1553+
if (ret == -EAGAIN && list_empty(&bg->bg_list))
1554+
list_add_tail(&bg->bg_list, &again_list);
1555+
else
1556+
btrfs_put_block_group(bg);
15521557
}
1558+
list_splice_tail(&again_list, &fs_info->reclaim_bgs);
15531559
spin_unlock(&fs_info->unused_bgs_lock);
15541560
mutex_unlock(&fs_info->reclaim_bgs_lock);
15551561
btrfs_exclop_finish(fs_info);

fs/btrfs/ctree.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -561,13 +561,13 @@ enum {
561561
/*
562562
* Indicate that balance has been set up from the ioctl and is in the
563563
* main phase. The fs_info::balance_ctl is initialized.
564-
* Set and cleared while holding fs_info::balance_mutex.
565564
*/
566565
BTRFS_FS_BALANCE_RUNNING,
567566

568567
/*
569568
* Indicate that relocation of a chunk has started, it's set per chunk
570569
* and is toggled between chunks.
570+
* Set, tested and cleared while holding fs_info::send_reloc_lock.
571571
*/
572572
BTRFS_FS_RELOC_RUNNING,
573573

@@ -995,9 +995,10 @@ struct btrfs_fs_info {
995995

996996
struct crypto_shash *csum_shash;
997997

998+
spinlock_t send_reloc_lock;
998999
/*
9991000
* Number of send operations in progress.
1000-
* Updated while holding fs_info::balance_mutex.
1001+
* Updated while holding fs_info::send_reloc_lock.
10011002
*/
10021003
int send_in_progress;
10031004

fs/btrfs/disk-io.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2999,6 +2999,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
29992999
spin_lock_init(&fs_info->swapfile_pins_lock);
30003000
fs_info->swapfile_pins = RB_ROOT;
30013001

3002+
spin_lock_init(&fs_info->send_reloc_lock);
30023003
fs_info->send_in_progress = 0;
30033004

30043005
fs_info->bg_reclaim_threshold = BTRFS_DEFAULT_RECLAIM_THRESH;

fs/btrfs/relocation.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3789,14 +3789,25 @@ struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
37893789
* 0 success
37903790
* -EINPROGRESS operation is already in progress, that's probably a bug
37913791
* -ECANCELED cancellation request was set before the operation started
3792+
* -EAGAIN can not start because there are ongoing send operations
37923793
*/
37933794
static int reloc_chunk_start(struct btrfs_fs_info *fs_info)
37943795
{
3796+
spin_lock(&fs_info->send_reloc_lock);
3797+
if (fs_info->send_in_progress) {
3798+
btrfs_warn_rl(fs_info,
3799+
"cannot run relocation while send operations are in progress (%d in progress)",
3800+
fs_info->send_in_progress);
3801+
spin_unlock(&fs_info->send_reloc_lock);
3802+
return -EAGAIN;
3803+
}
37953804
if (test_and_set_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags)) {
37963805
/* This should not happen */
3806+
spin_unlock(&fs_info->send_reloc_lock);
37973807
btrfs_err(fs_info, "reloc already running, cannot start");
37983808
return -EINPROGRESS;
37993809
}
3810+
spin_unlock(&fs_info->send_reloc_lock);
38003811

38013812
if (atomic_read(&fs_info->reloc_cancel_req) > 0) {
38023813
btrfs_info(fs_info, "chunk relocation canceled on start");
@@ -3818,7 +3829,9 @@ static void reloc_chunk_end(struct btrfs_fs_info *fs_info)
38183829
/* Requested after start, clear bit first so any waiters can continue */
38193830
if (atomic_read(&fs_info->reloc_cancel_req) > 0)
38203831
btrfs_info(fs_info, "chunk relocation canceled during operation");
3832+
spin_lock(&fs_info->send_reloc_lock);
38213833
clear_and_wake_up_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags);
3834+
spin_unlock(&fs_info->send_reloc_lock);
38223835
atomic_set(&fs_info->reloc_cancel_req, 0);
38233836
}
38243837

fs/btrfs/send.c

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7416,23 +7416,23 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
74167416
if (ret)
74177417
goto out;
74187418

7419-
mutex_lock(&fs_info->balance_mutex);
7420-
if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
7421-
mutex_unlock(&fs_info->balance_mutex);
7419+
spin_lock(&fs_info->send_reloc_lock);
7420+
if (test_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags)) {
7421+
spin_unlock(&fs_info->send_reloc_lock);
74227422
btrfs_warn_rl(fs_info,
7423-
"cannot run send because a balance operation is in progress");
7423+
"cannot run send because a relocation operation is in progress");
74247424
ret = -EAGAIN;
74257425
goto out;
74267426
}
74277427
fs_info->send_in_progress++;
7428-
mutex_unlock(&fs_info->balance_mutex);
7428+
spin_unlock(&fs_info->send_reloc_lock);
74297429

74307430
current->journal_info = BTRFS_SEND_TRANS_STUB;
74317431
ret = send_subvol(sctx);
74327432
current->journal_info = NULL;
7433-
mutex_lock(&fs_info->balance_mutex);
7433+
spin_lock(&fs_info->send_reloc_lock);
74347434
fs_info->send_in_progress--;
7435-
mutex_unlock(&fs_info->balance_mutex);
7435+
spin_unlock(&fs_info->send_reloc_lock);
74367436
if (ret < 0)
74377437
goto out;
74387438

fs/btrfs/volumes.c

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4217,14 +4217,6 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
42174217
btrfs_bg_type_to_raid_name(data_target));
42184218
}
42194219

4220-
if (fs_info->send_in_progress) {
4221-
btrfs_warn_rl(fs_info,
4222-
"cannot run balance while send operations are in progress (%d in progress)",
4223-
fs_info->send_in_progress);
4224-
ret = -EAGAIN;
4225-
goto out;
4226-
}
4227-
42284220
ret = insert_balance_item(fs_info, bctl);
42294221
if (ret && ret != -EEXIST)
42304222
goto out;

0 commit comments

Comments
 (0)