Skip to content

Commit c51e9aa

Browse files
lorddoskiasgregkh
authored andcommitted
btrfs: Ensure replaced device doesn't have pending chunk allocation
commit debd1c0 upstream. Recent FITRIM work, namely bbbf724 ("btrfs: combine device update operations during transaction commit") combined the way certain operations are recoded in a transaction. As a result an ASSERT was added in dev_replace_finish to ensure the new code works correctly. Unfortunately I got reports that it's possible to trigger the assert, meaning that during a device replace it's possible to have an unfinished chunk allocation on the source device. This is supposed to be prevented by the fact that a transaction is committed before finishing the replace oepration and alter acquiring the chunk mutex. This is not sufficient since by the time the transaction is committed and the chunk mutex acquired it's possible to allocate a chunk depending on the workload being executed on the replaced device. This bug has been present ever since device replace was introduced but there was never code which checks for it. The correct way to fix is to ensure that there is no pending device modification operation when the chunk mutex is acquire and if there is repeat transaction commit. Unfortunately it's not possible to just exclude the source device from btrfs_fs_devices::dev_alloc_list since this causes ENOSPC to be hit in transaction commit. Fixing that in another way would need to add special cases to handle the last writes and forbid new ones. The looped transaction fix is more obvious, and can be easily backported. The runtime of dev-replace is long so there's no noticeable delay caused by that. Reported-by: David Sterba <[email protected]> Fixes: 391cd9d ("Btrfs: fix unprotected alloc list insertion during the finishing procedure of replace") CC: [email protected] # 4.4+ Signed-off-by: Nikolay Borisov <[email protected]> Reviewed-by: David Sterba <[email protected]> Signed-off-by: David Sterba <[email protected]> Signed-off-by: Greg Kroah-Hartman <[email protected]>
1 parent e537ba0 commit c51e9aa

File tree

3 files changed

+26
-10
lines changed

3 files changed

+26
-10
lines changed

fs/btrfs/dev-replace.c

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -511,18 +511,27 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
511511
}
512512
btrfs_wait_ordered_roots(root->fs_info, -1, 0, (u64)-1);
513513

514-
trans = btrfs_start_transaction(root, 0);
515-
if (IS_ERR(trans)) {
516-
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
517-
return PTR_ERR(trans);
514+
while (1) {
515+
trans = btrfs_start_transaction(root, 0);
516+
if (IS_ERR(trans)) {
517+
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
518+
return PTR_ERR(trans);
519+
}
520+
ret = btrfs_commit_transaction(trans, root);
521+
WARN_ON(ret);
522+
mutex_lock(&uuid_mutex);
523+
/* keep away write_all_supers() during the finishing procedure */
524+
mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
525+
mutex_lock(&root->fs_info->chunk_mutex);
526+
if (src_device->has_pending_chunks) {
527+
mutex_unlock(&root->fs_info->chunk_mutex);
528+
mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
529+
mutex_unlock(&uuid_mutex);
530+
} else {
531+
break;
532+
}
518533
}
519-
ret = btrfs_commit_transaction(trans, root);
520-
WARN_ON(ret);
521534

522-
mutex_lock(&uuid_mutex);
523-
/* keep away write_all_supers() during the finishing procedure */
524-
mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
525-
mutex_lock(&root->fs_info->chunk_mutex);
526535
btrfs_dev_replace_lock(dev_replace, 1);
527536
dev_replace->replace_state =
528537
scrub_ret ? BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED

fs/btrfs/volumes.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4876,6 +4876,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
48764876
for (i = 0; i < map->num_stripes; i++) {
48774877
num_bytes = map->stripes[i].dev->bytes_used + stripe_size;
48784878
btrfs_device_set_bytes_used(map->stripes[i].dev, num_bytes);
4879+
map->stripes[i].dev->has_pending_chunks = true;
48794880
}
48804881

48814882
spin_lock(&extent_root->fs_info->free_chunk_lock);
@@ -7250,6 +7251,7 @@ void btrfs_update_commit_device_bytes_used(struct btrfs_root *root,
72507251
for (i = 0; i < map->num_stripes; i++) {
72517252
dev = map->stripes[i].dev;
72527253
dev->commit_bytes_used = dev->bytes_used;
7254+
dev->has_pending_chunks = false;
72537255
}
72547256
}
72557257
unlock_chunks(root);

fs/btrfs/volumes.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,11 @@ struct btrfs_device {
6262

6363
spinlock_t io_lock ____cacheline_aligned;
6464
int running_pending;
65+
/* When true means this device has pending chunk alloc in
66+
* current transaction. Protected by chunk_mutex.
67+
*/
68+
bool has_pending_chunks;
69+
6570
/* regular prio bios */
6671
struct btrfs_pending_bios pending_bios;
6772
/* WRITE_SYNC bios */

0 commit comments

Comments
 (0)