Skip to content

Commit fd574a2

Browse files
committed
Merge tag 'for-5.18-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs fixes from David Sterba: - direct IO fixes: - restore passing file offset to correctly calculate checksums when repairing on read and bio split happens - use correct bio when sumitting IO on zoned filesystem - zoned mode fixes: - fix selection of device to correctly calculate device capabilities when allocating a new bio - use a dedicated lock for exclusion during relocation - fix leaked plug after failure syncing log - fix assertion during scrub and relocation * tag 'for-5.18-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: btrfs: zoned: use dedicated lock for data relocation btrfs: fix assertion failure during scrub due to block group reallocation btrfs: fix direct I/O writes for split bios on zoned devices btrfs: fix direct I/O read repair for split bios btrfs: fix and document the zoned device choice in alloc_new_bio btrfs: fix leaked plug after failure syncing log on zoned filesystems
2 parents d615b54 + 5f0addf commit fd574a2

File tree

9 files changed

+76
-29
lines changed

9 files changed

+76
-29
lines changed

fs/btrfs/ctree.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1060,6 +1060,7 @@ struct btrfs_fs_info {
10601060
*/
10611061
spinlock_t relocation_bg_lock;
10621062
u64 data_reloc_bg;
1063+
struct mutex zoned_data_reloc_io_lock;
10631064

10641065
u64 nr_global_roots;
10651066

fs/btrfs/dev-replace.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -734,7 +734,12 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
734734

735735
btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
736736

737-
/* Commit dev_replace state and reserve 1 item for it. */
737+
/*
738+
* Commit dev_replace state and reserve 1 item for it.
739+
* This is crucial to ensure we won't miss copying extents for new block
740+
* groups that are allocated after we started the device replace, and
741+
* must be done after setting up the device replace state.
742+
*/
738743
trans = btrfs_start_transaction(root, 1);
739744
if (IS_ERR(trans)) {
740745
ret = PTR_ERR(trans);

fs/btrfs/disk-io.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3157,6 +3157,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
31573157
mutex_init(&fs_info->reloc_mutex);
31583158
mutex_init(&fs_info->delalloc_root_mutex);
31593159
mutex_init(&fs_info->zoned_meta_io_lock);
3160+
mutex_init(&fs_info->zoned_data_reloc_io_lock);
31603161
seqlock_init(&fs_info->profiles_lock);
31613162

31623163
INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);

fs/btrfs/extent_io.c

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2658,6 +2658,7 @@ int btrfs_repair_one_sector(struct inode *inode,
26582658

26592659
repair_bio = btrfs_bio_alloc(1);
26602660
repair_bbio = btrfs_bio(repair_bio);
2661+
repair_bbio->file_offset = start;
26612662
repair_bio->bi_opf = REQ_OP_READ;
26622663
repair_bio->bi_end_io = failed_bio->bi_end_io;
26632664
repair_bio->bi_iter.bi_sector = failrec->logical >> 9;
@@ -3333,24 +3334,37 @@ static int alloc_new_bio(struct btrfs_inode *inode,
33333334
ret = calc_bio_boundaries(bio_ctrl, inode, file_offset);
33343335
if (ret < 0)
33353336
goto error;
3336-
if (wbc) {
3337-
struct block_device *bdev;
33383337

3339-
bdev = fs_info->fs_devices->latest_dev->bdev;
3340-
bio_set_dev(bio, bdev);
3341-
wbc_init_bio(wbc, bio);
3342-
}
3343-
if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
3344-
struct btrfs_device *device;
3338+
if (wbc) {
3339+
/*
3340+
* For Zone append we need the correct block_device that we are
3341+
* going to write to set in the bio to be able to respect the
3342+
* hardware limitation. Look it up here:
3343+
*/
3344+
if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
3345+
struct btrfs_device *dev;
3346+
3347+
dev = btrfs_zoned_get_device(fs_info, disk_bytenr,
3348+
fs_info->sectorsize);
3349+
if (IS_ERR(dev)) {
3350+
ret = PTR_ERR(dev);
3351+
goto error;
3352+
}
33453353

3346-
device = btrfs_zoned_get_device(fs_info, disk_bytenr,
3347-
fs_info->sectorsize);
3348-
if (IS_ERR(device)) {
3349-
ret = PTR_ERR(device);
3350-
goto error;
3354+
bio_set_dev(bio, dev->bdev);
3355+
} else {
3356+
/*
3357+
* Otherwise pick the last added device to support
3358+
* cgroup writeback. For multi-device file systems this
3359+
* means blk-cgroup policies have to always be set on the
3360+
* last added/replaced device. This is a bit odd but has
3361+
* been like that for a long time.
3362+
*/
3363+
bio_set_dev(bio, fs_info->fs_devices->latest_dev->bdev);
33513364
}
3352-
3353-
btrfs_bio(bio)->device = device;
3365+
wbc_init_bio(wbc, bio);
3366+
} else {
3367+
ASSERT(bio_op(bio) != REQ_OP_ZONE_APPEND);
33543368
}
33553369
return 0;
33563370
error:

fs/btrfs/inode.c

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7810,8 +7810,6 @@ static blk_status_t btrfs_check_read_dio_bio(struct btrfs_dio_private *dip,
78107810
const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM);
78117811
struct bio_vec bvec;
78127812
struct bvec_iter iter;
7813-
const u64 orig_file_offset = dip->file_offset;
7814-
u64 start = orig_file_offset;
78157813
u32 bio_offset = 0;
78167814
blk_status_t err = BLK_STS_OK;
78177815

@@ -7821,6 +7819,8 @@ static blk_status_t btrfs_check_read_dio_bio(struct btrfs_dio_private *dip,
78217819
nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec.bv_len);
78227820
pgoff = bvec.bv_offset;
78237821
for (i = 0; i < nr_sectors; i++) {
7822+
u64 start = bbio->file_offset + bio_offset;
7823+
78247824
ASSERT(pgoff < PAGE_SIZE);
78257825
if (uptodate &&
78267826
(!csum || !check_data_csum(inode, bbio,
@@ -7833,17 +7833,13 @@ static blk_status_t btrfs_check_read_dio_bio(struct btrfs_dio_private *dip,
78337833
} else {
78347834
int ret;
78357835

7836-
ASSERT((start - orig_file_offset) < UINT_MAX);
7837-
ret = btrfs_repair_one_sector(inode,
7838-
&bbio->bio,
7839-
start - orig_file_offset,
7840-
bvec.bv_page, pgoff,
7836+
ret = btrfs_repair_one_sector(inode, &bbio->bio,
7837+
bio_offset, bvec.bv_page, pgoff,
78417838
start, bbio->mirror_num,
78427839
submit_dio_repair_bio);
78437840
if (ret)
78447841
err = errno_to_blk_status(ret);
78457842
}
7846-
start += sectorsize;
78477843
ASSERT(bio_offset + sectorsize > bio_offset);
78487844
bio_offset += sectorsize;
78497845
pgoff += sectorsize;
@@ -7870,6 +7866,7 @@ static blk_status_t btrfs_submit_bio_start_direct_io(struct inode *inode,
78707866
static void btrfs_end_dio_bio(struct bio *bio)
78717867
{
78727868
struct btrfs_dio_private *dip = bio->bi_private;
7869+
struct btrfs_bio *bbio = btrfs_bio(bio);
78737870
blk_status_t err = bio->bi_status;
78747871

78757872
if (err)
@@ -7880,12 +7877,12 @@ static void btrfs_end_dio_bio(struct bio *bio)
78807877
bio->bi_iter.bi_size, err);
78817878

78827879
if (bio_op(bio) == REQ_OP_READ)
7883-
err = btrfs_check_read_dio_bio(dip, btrfs_bio(bio), !err);
7880+
err = btrfs_check_read_dio_bio(dip, bbio, !err);
78847881

78857882
if (err)
78867883
dip->dio_bio->bi_status = err;
78877884

7888-
btrfs_record_physical_zoned(dip->inode, dip->file_offset, bio);
7885+
btrfs_record_physical_zoned(dip->inode, bbio->file_offset, bio);
78897886

78907887
bio_put(bio);
78917888
btrfs_dio_private_put(dip);
@@ -8046,6 +8043,7 @@ static void btrfs_submit_direct(const struct iomap_iter *iter,
80468043
bio = btrfs_bio_clone_partial(dio_bio, clone_offset, clone_len);
80478044
bio->bi_private = dip;
80488045
bio->bi_end_io = btrfs_end_dio_bio;
8046+
btrfs_bio(bio)->file_offset = file_offset;
80498047

80508048
if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
80518049
status = extract_ordered_extent(BTRFS_I(inode), bio,

fs/btrfs/scrub.c

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3699,6 +3699,31 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
36993699
if (!cache)
37003700
goto skip;
37013701

3702+
ASSERT(cache->start <= chunk_offset);
3703+
/*
3704+
* We are using the commit root to search for device extents, so
3705+
* that means we could have found a device extent item from a
3706+
* block group that was deleted in the current transaction. The
3707+
* logical start offset of the deleted block group, stored at
3708+
* @chunk_offset, might be part of the logical address range of
3709+
* a new block group (which uses different physical extents).
3710+
* In this case btrfs_lookup_block_group() has returned the new
3711+
* block group, and its start address is less than @chunk_offset.
3712+
*
3713+
* We skip such new block groups, because it's pointless to
3714+
* process them, as we won't find their extents because we search
3715+
* for them using the commit root of the extent tree. For a device
3716+
* replace it's also fine to skip it, we won't miss copying them
3717+
* to the target device because we have the write duplication
3718+
* setup through the regular write path (by btrfs_map_block()),
3719+
* and we have committed a transaction when we started the device
3720+
* replace, right after setting up the device replace state.
3721+
*/
3722+
if (cache->start < chunk_offset) {
3723+
btrfs_put_block_group(cache);
3724+
goto skip;
3725+
}
3726+
37023727
if (sctx->is_dev_replace && btrfs_is_zoned(fs_info)) {
37033728
spin_lock(&cache->lock);
37043729
if (!cache->to_copy) {
@@ -3822,7 +3847,6 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
38223847
dev_replace->item_needs_writeback = 1;
38233848
up_write(&dev_replace->rwsem);
38243849

3825-
ASSERT(cache->start == chunk_offset);
38263850
ret = scrub_chunk(sctx, cache, scrub_dev, found_key.offset,
38273851
dev_extent_len);
38283852

fs/btrfs/tree-log.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3188,6 +3188,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
31883188
ret = btrfs_alloc_log_tree_node(trans, log_root_tree);
31893189
if (ret) {
31903190
mutex_unlock(&fs_info->tree_root->log_mutex);
3191+
blk_finish_plug(&plug);
31913192
goto out;
31923193
}
31933194
}

fs/btrfs/volumes.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,9 @@ struct btrfs_fs_devices {
328328
struct btrfs_bio {
329329
unsigned int mirror_num;
330330

331+
/* for direct I/O */
332+
u64 file_offset;
333+
331334
/* @device is for stripe IO submission. */
332335
struct btrfs_device *device;
333336
u8 *csum;

fs/btrfs/zoned.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -359,15 +359,15 @@ static inline void btrfs_zoned_data_reloc_lock(struct btrfs_inode *inode)
359359
struct btrfs_root *root = inode->root;
360360

361361
if (btrfs_is_data_reloc_root(root) && btrfs_is_zoned(root->fs_info))
362-
btrfs_inode_lock(&inode->vfs_inode, 0);
362+
mutex_lock(&root->fs_info->zoned_data_reloc_io_lock);
363363
}
364364

365365
static inline void btrfs_zoned_data_reloc_unlock(struct btrfs_inode *inode)
366366
{
367367
struct btrfs_root *root = inode->root;
368368

369369
if (btrfs_is_data_reloc_root(root) && btrfs_is_zoned(root->fs_info))
370-
btrfs_inode_unlock(&inode->vfs_inode, 0);
370+
mutex_unlock(&root->fs_info->zoned_data_reloc_io_lock);
371371
}
372372

373373
#endif

0 commit comments

Comments
 (0)