Skip to content

Commit 60891ec

Browse files
committed
Merge tag 'for-6.0-rc6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs fixes from David Sterba: - two fixes for hangs in the umount sequence where threads depend on each other and the work must be finished in the right order - in zoned mode, wait for flushing all block group metadata IO before finishing the zone * tag 'for-6.0-rc6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: btrfs: zoned: wait for extent buffer IOs before finishing a zone btrfs: fix hang during unmount when stopping a space reclaim worker btrfs: fix hang during unmount when stopping block group reclaim worker
2 parents 84a3193 + 2dd7e7b commit 60891ec

File tree

2 files changed

+74
-8
lines changed

2 files changed

+74
-8
lines changed

fs/btrfs/disk-io.c

Lines changed: 36 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4474,6 +4474,17 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
44744474

44754475
set_bit(BTRFS_FS_CLOSING_START, &fs_info->flags);
44764476

4477+
/*
4478+
* If we had UNFINISHED_DROPS we could still be processing them, so
4479+
* clear that bit and wake up relocation so it can stop.
4480+
* We must do this before stopping the block group reclaim task, because
4481+
* at btrfs_relocate_block_group() we wait for this bit, and after the
4482+
* wait we stop with -EINTR if btrfs_fs_closing() returns non-zero - we
4483+
* have just set BTRFS_FS_CLOSING_START, so btrfs_fs_closing() will
4484+
* return 1.
4485+
*/
4486+
btrfs_wake_unfinished_drop(fs_info);
4487+
44774488
/*
44784489
* We may have the reclaim task running and relocating a data block group,
44794490
* in which case it may create delayed iputs. So stop it before we park
@@ -4492,12 +4503,6 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
44924503
*/
44934504
kthread_park(fs_info->cleaner_kthread);
44944505

4495-
/*
4496-
* If we had UNFINISHED_DROPS we could still be processing them, so
4497-
* clear that bit and wake up relocation so it can stop.
4498-
*/
4499-
btrfs_wake_unfinished_drop(fs_info);
4500-
45014506
/* wait for the qgroup rescan worker to stop */
45024507
btrfs_qgroup_wait_for_completion(fs_info, false);
45034508

@@ -4520,6 +4525,31 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
45204525
/* clear out the rbtree of defraggable inodes */
45214526
btrfs_cleanup_defrag_inodes(fs_info);
45224527

4528+
/*
4529+
* After we parked the cleaner kthread, ordered extents may have
4530+
* completed and created new delayed iputs. If one of the async reclaim
4531+
* tasks is running and in the RUN_DELAYED_IPUTS flush state, then we
4532+
* can hang forever trying to stop it, because if a delayed iput is
4533+
* added after it ran btrfs_run_delayed_iputs() and before it called
4534+
* btrfs_wait_on_delayed_iputs(), it will hang forever since there is
4535+
* no one else to run iputs.
4536+
*
4537+
* So wait for all ongoing ordered extents to complete and then run
4538+
* delayed iputs. This works because once we reach this point no one
4539+
* can either create new ordered extents nor create delayed iputs
4540+
* through some other means.
4541+
*
4542+
* Also note that btrfs_wait_ordered_roots() is not safe here, because
4543+
* it waits for BTRFS_ORDERED_COMPLETE to be set on an ordered extent,
4544+
* but the delayed iput for the respective inode is made only when doing
4545+
* the final btrfs_put_ordered_extent() (which must happen at
4546+
* btrfs_finish_ordered_io() when we are unmounting).
4547+
*/
4548+
btrfs_flush_workqueue(fs_info->endio_write_workers);
4549+
/* Ordered extents for free space inodes. */
4550+
btrfs_flush_workqueue(fs_info->endio_freespace_worker);
4551+
btrfs_run_delayed_iputs(fs_info);
4552+
45234553
cancel_work_sync(&fs_info->async_reclaim_work);
45244554
cancel_work_sync(&fs_info->async_data_reclaim_work);
45254555
cancel_work_sync(&fs_info->preempt_reclaim_work);

fs/btrfs/zoned.c

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1918,10 +1918,44 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
19181918
return ret;
19191919
}
19201920

1921+
static void wait_eb_writebacks(struct btrfs_block_group *block_group)
1922+
{
1923+
struct btrfs_fs_info *fs_info = block_group->fs_info;
1924+
const u64 end = block_group->start + block_group->length;
1925+
struct radix_tree_iter iter;
1926+
struct extent_buffer *eb;
1927+
void __rcu **slot;
1928+
1929+
rcu_read_lock();
1930+
radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter,
1931+
block_group->start >> fs_info->sectorsize_bits) {
1932+
eb = radix_tree_deref_slot(slot);
1933+
if (!eb)
1934+
continue;
1935+
if (radix_tree_deref_retry(eb)) {
1936+
slot = radix_tree_iter_retry(&iter);
1937+
continue;
1938+
}
1939+
1940+
if (eb->start < block_group->start)
1941+
continue;
1942+
if (eb->start >= end)
1943+
break;
1944+
1945+
slot = radix_tree_iter_resume(slot, &iter);
1946+
rcu_read_unlock();
1947+
wait_on_extent_buffer_writeback(eb);
1948+
rcu_read_lock();
1949+
}
1950+
rcu_read_unlock();
1951+
}
1952+
19211953
static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_written)
19221954
{
19231955
struct btrfs_fs_info *fs_info = block_group->fs_info;
19241956
struct map_lookup *map;
1957+
const bool is_metadata = (block_group->flags &
1958+
(BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM));
19251959
int ret = 0;
19261960
int i;
19271961

@@ -1932,8 +1966,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
19321966
}
19331967

19341968
/* Check if we have unwritten allocated space */
1935-
if ((block_group->flags &
1936-
(BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)) &&
1969+
if (is_metadata &&
19371970
block_group->start + block_group->alloc_offset > block_group->meta_write_pointer) {
19381971
spin_unlock(&block_group->lock);
19391972
return -EAGAIN;
@@ -1958,6 +1991,9 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
19581991
/* No need to wait for NOCOW writers. Zoned mode does not allow that */
19591992
btrfs_wait_ordered_roots(fs_info, U64_MAX, block_group->start,
19601993
block_group->length);
1994+
/* Wait for extent buffers to be written. */
1995+
if (is_metadata)
1996+
wait_eb_writebacks(block_group);
19611997

19621998
spin_lock(&block_group->lock);
19631999

0 commit comments

Comments
 (0)