Skip to content

Commit 9b45094

Browse files
committed
Merge tag 'for-6.0-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs fixes from David Sterba: "A few more fixes to zoned mode and one regression fix for chunk limit: - Zoned mode fixes: - fix how wait/wake up is done when finishing zone - fix zone append limit in emulated mode - fix mount on devices with conventional zones - fix regression, user settable data chunk limit got accidentally lowered and causes allocation problems on some profiles (raid0, raid1)" * tag 'for-6.0-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: btrfs: fix the max chunk size and stripe length calculation btrfs: zoned: fix mounting with conventional zones btrfs: zoned: set pseudo max append zone limit in zone emulation mode btrfs: zoned: fix API misuse of zone finish waiting
2 parents 725f3f3 + 5da431b commit 9b45094

File tree

6 files changed

+60
-54
lines changed

6 files changed

+60
-54
lines changed

fs/btrfs/ctree.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1088,8 +1088,6 @@ struct btrfs_fs_info {
10881088

10891089
spinlock_t zone_active_bgs_lock;
10901090
struct list_head zone_active_bgs;
1091-
/* Waiters when BTRFS_FS_NEED_ZONE_FINISH is set */
1092-
wait_queue_head_t zone_finish_wait;
10931091

10941092
/* Updates are not protected by any lock */
10951093
struct btrfs_commit_stats commit_stats;

fs/btrfs/disk-io.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3068,7 +3068,6 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
30683068
init_waitqueue_head(&fs_info->transaction_blocked_wait);
30693069
init_waitqueue_head(&fs_info->async_submit_wait);
30703070
init_waitqueue_head(&fs_info->delayed_iputs_wait);
3071-
init_waitqueue_head(&fs_info->zone_finish_wait);
30723071

30733072
/* Usable values until the real ones are cached from the superblock */
30743073
fs_info->nodesize = 4096;

fs/btrfs/inode.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1644,10 +1644,9 @@ static noinline int run_delalloc_zoned(struct btrfs_inode *inode,
16441644
done_offset = end;
16451645

16461646
if (done_offset == start) {
1647-
struct btrfs_fs_info *info = inode->root->fs_info;
1648-
1649-
wait_var_event(&info->zone_finish_wait,
1650-
!test_bit(BTRFS_FS_NEED_ZONE_FINISH, &info->flags));
1647+
wait_on_bit_io(&inode->root->fs_info->flags,
1648+
BTRFS_FS_NEED_ZONE_FINISH,
1649+
TASK_UNINTERRUPTIBLE);
16511650
continue;
16521651
}
16531652

fs/btrfs/space-info.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ static u64 calc_chunk_size(const struct btrfs_fs_info *fs_info, u64 flags)
199199
ASSERT(flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
200200

201201
if (flags & BTRFS_BLOCK_GROUP_DATA)
202-
return SZ_1G;
202+
return BTRFS_MAX_DATA_CHUNK_SIZE;
203203
else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
204204
return SZ_32M;
205205

fs/btrfs/volumes.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5267,6 +5267,9 @@ static int decide_stripe_size_regular(struct alloc_chunk_ctl *ctl,
52675267
ctl->stripe_size);
52685268
}
52695269

5270+
/* Stripe size should not go beyond 1G. */
5271+
ctl->stripe_size = min_t(u64, ctl->stripe_size, SZ_1G);
5272+
52705273
/* Align to BTRFS_STRIPE_LEN */
52715274
ctl->stripe_size = round_down(ctl->stripe_size, BTRFS_STRIPE_LEN);
52725275
ctl->chunk_size = ctl->stripe_size * data_stripes;

fs/btrfs/zoned.c

Lines changed: 53 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -421,10 +421,19 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
421421
* since btrfs adds the pages one by one to a bio, and btrfs cannot
422422
* increase the metadata reservation even if it increases the number of
423423
* extents, it is safe to stick with the limit.
424+
*
425+
* With the zoned emulation, we can have non-zoned device on the zoned
426+
* mode. In this case, we don't have a valid max zone append size. So,
427+
* use max_segments * PAGE_SIZE as the pseudo max_zone_append_size.
424428
*/
425-
zone_info->max_zone_append_size =
426-
min_t(u64, (u64)bdev_max_zone_append_sectors(bdev) << SECTOR_SHIFT,
427-
(u64)bdev_max_segments(bdev) << PAGE_SHIFT);
429+
if (bdev_is_zoned(bdev)) {
430+
zone_info->max_zone_append_size = min_t(u64,
431+
(u64)bdev_max_zone_append_sectors(bdev) << SECTOR_SHIFT,
432+
(u64)bdev_max_segments(bdev) << PAGE_SHIFT);
433+
} else {
434+
zone_info->max_zone_append_size =
435+
(u64)bdev_max_segments(bdev) << PAGE_SHIFT;
436+
}
428437
if (!IS_ALIGNED(nr_sectors, zone_sectors))
429438
zone_info->nr_zones++;
430439

@@ -1178,7 +1187,7 @@ int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size)
11781187
* offset.
11791188
*/
11801189
static int calculate_alloc_pointer(struct btrfs_block_group *cache,
1181-
u64 *offset_ret)
1190+
u64 *offset_ret, bool new)
11821191
{
11831192
struct btrfs_fs_info *fs_info = cache->fs_info;
11841193
struct btrfs_root *root;
@@ -1188,6 +1197,21 @@ static int calculate_alloc_pointer(struct btrfs_block_group *cache,
11881197
int ret;
11891198
u64 length;
11901199

1200+
/*
1201+
* Avoid tree lookups for a new block group, there's no use for it.
1202+
* It must always be 0.
1203+
*
1204+
* Also, we have a lock chain of extent buffer lock -> chunk mutex.
1205+
* For new a block group, this function is called from
1206+
* btrfs_make_block_group() which is already taking the chunk mutex.
1207+
* Thus, we cannot call calculate_alloc_pointer() which takes extent
1208+
* buffer locks to avoid deadlock.
1209+
*/
1210+
if (new) {
1211+
*offset_ret = 0;
1212+
return 0;
1213+
}
1214+
11911215
path = btrfs_alloc_path();
11921216
if (!path)
11931217
return -ENOMEM;
@@ -1323,6 +1347,13 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
13231347
else
13241348
num_conventional++;
13251349

1350+
/*
1351+
* Consider a zone as active if we can allow any number of
1352+
* active zones.
1353+
*/
1354+
if (!device->zone_info->max_active_zones)
1355+
__set_bit(i, active);
1356+
13261357
if (!is_sequential) {
13271358
alloc_offsets[i] = WP_CONVENTIONAL;
13281359
continue;
@@ -1389,45 +1420,23 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
13891420
__set_bit(i, active);
13901421
break;
13911422
}
1392-
1393-
/*
1394-
* Consider a zone as active if we can allow any number of
1395-
* active zones.
1396-
*/
1397-
if (!device->zone_info->max_active_zones)
1398-
__set_bit(i, active);
13991423
}
14001424

14011425
if (num_sequential > 0)
14021426
cache->seq_zone = true;
14031427

14041428
if (num_conventional > 0) {
1405-
/*
1406-
* Avoid calling calculate_alloc_pointer() for new BG. It
1407-
* is no use for new BG. It must be always 0.
1408-
*
1409-
* Also, we have a lock chain of extent buffer lock ->
1410-
* chunk mutex. For new BG, this function is called from
1411-
* btrfs_make_block_group() which is already taking the
1412-
* chunk mutex. Thus, we cannot call
1413-
* calculate_alloc_pointer() which takes extent buffer
1414-
* locks to avoid deadlock.
1415-
*/
1416-
14171429
/* Zone capacity is always zone size in emulation */
14181430
cache->zone_capacity = cache->length;
1419-
if (new) {
1420-
cache->alloc_offset = 0;
1421-
goto out;
1422-
}
1423-
ret = calculate_alloc_pointer(cache, &last_alloc);
1424-
if (ret || map->num_stripes == num_conventional) {
1425-
if (!ret)
1426-
cache->alloc_offset = last_alloc;
1427-
else
1428-
btrfs_err(fs_info,
1431+
ret = calculate_alloc_pointer(cache, &last_alloc, new);
1432+
if (ret) {
1433+
btrfs_err(fs_info,
14291434
"zoned: failed to determine allocation offset of bg %llu",
1430-
cache->start);
1435+
cache->start);
1436+
goto out;
1437+
} else if (map->num_stripes == num_conventional) {
1438+
cache->alloc_offset = last_alloc;
1439+
cache->zone_is_active = 1;
14311440
goto out;
14321441
}
14331442
}
@@ -1495,13 +1504,6 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
14951504
goto out;
14961505
}
14971506

1498-
if (cache->zone_is_active) {
1499-
btrfs_get_block_group(cache);
1500-
spin_lock(&fs_info->zone_active_bgs_lock);
1501-
list_add_tail(&cache->active_bg_list, &fs_info->zone_active_bgs);
1502-
spin_unlock(&fs_info->zone_active_bgs_lock);
1503-
}
1504-
15051507
out:
15061508
if (cache->alloc_offset > fs_info->zone_size) {
15071509
btrfs_err(fs_info,
@@ -1526,10 +1528,16 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
15261528
ret = -EIO;
15271529
}
15281530

1529-
if (!ret)
1531+
if (!ret) {
15301532
cache->meta_write_pointer = cache->alloc_offset + cache->start;
1531-
1532-
if (ret) {
1533+
if (cache->zone_is_active) {
1534+
btrfs_get_block_group(cache);
1535+
spin_lock(&fs_info->zone_active_bgs_lock);
1536+
list_add_tail(&cache->active_bg_list,
1537+
&fs_info->zone_active_bgs);
1538+
spin_unlock(&fs_info->zone_active_bgs_lock);
1539+
}
1540+
} else {
15331541
kfree(cache->physical_map);
15341542
cache->physical_map = NULL;
15351543
}
@@ -2007,8 +2015,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
20072015
/* For active_bg_list */
20082016
btrfs_put_block_group(block_group);
20092017

2010-
clear_bit(BTRFS_FS_NEED_ZONE_FINISH, &fs_info->flags);
2011-
wake_up_all(&fs_info->zone_finish_wait);
2018+
clear_and_wake_up_bit(BTRFS_FS_NEED_ZONE_FINISH, &fs_info->flags);
20122019

20132020
return 0;
20142021
}

0 commit comments

Comments
 (0)