Skip to content

Commit 276f98e

Browse files
committed
Merge tag 'block-6.14-20250228' of git://git.kernel.dk/linux
Pull block fixes from Jens Axboe: - Fix plugging for native zone writes - Fix segment limit settings for != 4K page size archs - Fix for slab names overflowing * tag 'block-6.14-20250228' of git://git.kernel.dk/linux: block: fix 'kmem_cache of name 'bio-108' already exists' block: Remove zone write plugs when handling native zone append writes block: make segment size limit workable for > 4K PAGE_SIZE
2 parents 3e5d15d + b654f7a commit 276f98e

File tree

6 files changed

+94
-17
lines changed

6 files changed

+94
-17
lines changed

block/bio.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ struct bio_slab {
7777
struct kmem_cache *slab;
7878
unsigned int slab_ref;
7979
unsigned int slab_size;
80-
char name[8];
80+
char name[12];
8181
};
8282
static DEFINE_MUTEX(bio_slab_lock);
8383
static DEFINE_XARRAY(bio_slabs);

block/blk-merge.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,7 @@ int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim,
329329

330330
if (nsegs < lim->max_segments &&
331331
bytes + bv.bv_len <= max_bytes &&
332-
bv.bv_offset + bv.bv_len <= PAGE_SIZE) {
332+
bv.bv_offset + bv.bv_len <= lim->min_segment_size) {
333333
nsegs++;
334334
bytes += bv.bv_len;
335335
} else {

block/blk-settings.c

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,7 @@ int blk_validate_limits(struct queue_limits *lim)
246246
{
247247
unsigned int max_hw_sectors;
248248
unsigned int logical_block_sectors;
249+
unsigned long seg_size;
249250
int err;
250251

251252
/*
@@ -303,7 +304,7 @@ int blk_validate_limits(struct queue_limits *lim)
303304
max_hw_sectors = min_not_zero(lim->max_hw_sectors,
304305
lim->max_dev_sectors);
305306
if (lim->max_user_sectors) {
306-
if (lim->max_user_sectors < PAGE_SIZE / SECTOR_SIZE)
307+
if (lim->max_user_sectors < BLK_MIN_SEGMENT_SIZE / SECTOR_SIZE)
307308
return -EINVAL;
308309
lim->max_sectors = min(max_hw_sectors, lim->max_user_sectors);
309310
} else if (lim->io_opt > (BLK_DEF_MAX_SECTORS_CAP << SECTOR_SHIFT)) {
@@ -341,7 +342,7 @@ int blk_validate_limits(struct queue_limits *lim)
341342
*/
342343
if (!lim->seg_boundary_mask)
343344
lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
344-
if (WARN_ON_ONCE(lim->seg_boundary_mask < PAGE_SIZE - 1))
345+
if (WARN_ON_ONCE(lim->seg_boundary_mask < BLK_MIN_SEGMENT_SIZE - 1))
345346
return -EINVAL;
346347

347348
/*
@@ -362,10 +363,17 @@ int blk_validate_limits(struct queue_limits *lim)
362363
*/
363364
if (!lim->max_segment_size)
364365
lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
365-
if (WARN_ON_ONCE(lim->max_segment_size < PAGE_SIZE))
366+
if (WARN_ON_ONCE(lim->max_segment_size < BLK_MIN_SEGMENT_SIZE))
366367
return -EINVAL;
367368
}
368369

370+
/* setup min segment size for building new segment in fast path */
371+
if (lim->seg_boundary_mask > lim->max_segment_size - 1)
372+
seg_size = lim->max_segment_size;
373+
else
374+
seg_size = lim->seg_boundary_mask + 1;
375+
lim->min_segment_size = min_t(unsigned int, seg_size, PAGE_SIZE);
376+
369377
/*
370378
* We require drivers to at least do logical block aligned I/O, but
371379
* historically could not check for that due to the separate calls

block/blk-zoned.c

Lines changed: 69 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -410,13 +410,14 @@ static bool disk_insert_zone_wplug(struct gendisk *disk,
410410
}
411411
}
412412
hlist_add_head_rcu(&zwplug->node, &disk->zone_wplugs_hash[idx]);
413+
atomic_inc(&disk->nr_zone_wplugs);
413414
spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
414415

415416
return true;
416417
}
417418

418-
static struct blk_zone_wplug *disk_get_zone_wplug(struct gendisk *disk,
419-
sector_t sector)
419+
static struct blk_zone_wplug *disk_get_hashed_zone_wplug(struct gendisk *disk,
420+
sector_t sector)
420421
{
421422
unsigned int zno = disk_zone_no(disk, sector);
422423
unsigned int idx = hash_32(zno, disk->zone_wplugs_hash_bits);
@@ -437,6 +438,15 @@ static struct blk_zone_wplug *disk_get_zone_wplug(struct gendisk *disk,
437438
return NULL;
438439
}
439440

441+
static inline struct blk_zone_wplug *disk_get_zone_wplug(struct gendisk *disk,
442+
sector_t sector)
443+
{
444+
if (!atomic_read(&disk->nr_zone_wplugs))
445+
return NULL;
446+
447+
return disk_get_hashed_zone_wplug(disk, sector);
448+
}
449+
440450
static void disk_free_zone_wplug_rcu(struct rcu_head *rcu_head)
441451
{
442452
struct blk_zone_wplug *zwplug =
@@ -503,6 +513,7 @@ static void disk_remove_zone_wplug(struct gendisk *disk,
503513
zwplug->flags |= BLK_ZONE_WPLUG_UNHASHED;
504514
spin_lock_irqsave(&disk->zone_wplugs_lock, flags);
505515
hlist_del_init_rcu(&zwplug->node);
516+
atomic_dec(&disk->nr_zone_wplugs);
506517
spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
507518
disk_put_zone_wplug(zwplug);
508519
}
@@ -593,6 +604,11 @@ static void disk_zone_wplug_abort(struct blk_zone_wplug *zwplug)
593604
{
594605
struct bio *bio;
595606

607+
if (bio_list_empty(&zwplug->bio_list))
608+
return;
609+
610+
pr_warn_ratelimited("%s: zone %u: Aborting plugged BIOs\n",
611+
zwplug->disk->disk_name, zwplug->zone_no);
596612
while ((bio = bio_list_pop(&zwplug->bio_list)))
597613
blk_zone_wplug_bio_io_error(zwplug, bio);
598614
}
@@ -1040,6 +1056,47 @@ static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs)
10401056
return true;
10411057
}
10421058

1059+
static void blk_zone_wplug_handle_native_zone_append(struct bio *bio)
1060+
{
1061+
struct gendisk *disk = bio->bi_bdev->bd_disk;
1062+
struct blk_zone_wplug *zwplug;
1063+
unsigned long flags;
1064+
1065+
/*
1066+
* We have native support for zone append operations, so we are not
1067+
* going to handle @bio through plugging. However, we may already have a
1068+
* zone write plug for the target zone if that zone was previously
1069+
* partially written using regular writes. In such case, we risk leaving
1070+
* the plug in the disk hash table if the zone is fully written using
1071+
* zone append operations. Avoid this by removing the zone write plug.
1072+
*/
1073+
zwplug = disk_get_zone_wplug(disk, bio->bi_iter.bi_sector);
1074+
if (likely(!zwplug))
1075+
return;
1076+
1077+
spin_lock_irqsave(&zwplug->lock, flags);
1078+
1079+
/*
1080+
* We are about to remove the zone write plug. But if the user
1081+
* (mistakenly) has issued regular writes together with native zone
1082+
* append, we must aborts the writes as otherwise the plugged BIOs would
1083+
* not be executed by the plug BIO work as disk_get_zone_wplug() will
1084+
* return NULL after the plug is removed. Aborting the plugged write
1085+
* BIOs is consistent with the fact that these writes will most likely
1086+
* fail anyway as there is no ordering guarantees between zone append
1087+
* operations and regular write operations.
1088+
*/
1089+
if (!bio_list_empty(&zwplug->bio_list)) {
1090+
pr_warn_ratelimited("%s: zone %u: Invalid mix of zone append and regular writes\n",
1091+
disk->disk_name, zwplug->zone_no);
1092+
disk_zone_wplug_abort(zwplug);
1093+
}
1094+
disk_remove_zone_wplug(disk, zwplug);
1095+
spin_unlock_irqrestore(&zwplug->lock, flags);
1096+
1097+
disk_put_zone_wplug(zwplug);
1098+
}
1099+
10431100
/**
10441101
* blk_zone_plug_bio - Handle a zone write BIO with zone write plugging
10451102
* @bio: The BIO being submitted
@@ -1096,8 +1153,10 @@ bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs)
10961153
*/
10971154
switch (bio_op(bio)) {
10981155
case REQ_OP_ZONE_APPEND:
1099-
if (!bdev_emulates_zone_append(bdev))
1156+
if (!bdev_emulates_zone_append(bdev)) {
1157+
blk_zone_wplug_handle_native_zone_append(bio);
11001158
return false;
1159+
}
11011160
fallthrough;
11021161
case REQ_OP_WRITE:
11031162
case REQ_OP_WRITE_ZEROES:
@@ -1284,6 +1343,7 @@ static int disk_alloc_zone_resources(struct gendisk *disk,
12841343
{
12851344
unsigned int i;
12861345

1346+
atomic_set(&disk->nr_zone_wplugs, 0);
12871347
disk->zone_wplugs_hash_bits =
12881348
min(ilog2(pool_size) + 1, BLK_ZONE_WPLUG_MAX_HASH_BITS);
12891349

@@ -1338,6 +1398,7 @@ static void disk_destroy_zone_wplugs_hash_table(struct gendisk *disk)
13381398
}
13391399
}
13401400

1401+
WARN_ON_ONCE(atomic_read(&disk->nr_zone_wplugs));
13411402
kfree(disk->zone_wplugs_hash);
13421403
disk->zone_wplugs_hash = NULL;
13431404
disk->zone_wplugs_hash_bits = 0;
@@ -1550,11 +1611,12 @@ static int blk_revalidate_seq_zone(struct blk_zone *zone, unsigned int idx,
15501611
}
15511612

15521613
/*
1553-
* We need to track the write pointer of all zones that are not
1554-
* empty nor full. So make sure we have a zone write plug for
1555-
* such zone if the device has a zone write plug hash table.
1614+
* If the device needs zone append emulation, we need to track the
1615+
* write pointer of all zones that are not empty nor full. So make sure
1616+
* we have a zone write plug for such zone if the device has a zone
1617+
* write plug hash table.
15561618
*/
1557-
if (!disk->zone_wplugs_hash)
1619+
if (!queue_emulates_zone_append(disk->queue) || !disk->zone_wplugs_hash)
15581620
return 0;
15591621

15601622
disk_zone_wplug_sync_wp_offset(disk, zone);

block/blk.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
struct elevator_type;
1515

1616
#define BLK_DEV_MAX_SECTORS (LLONG_MAX >> 9)
17+
#define BLK_MIN_SEGMENT_SIZE 4096
1718

1819
/* Max future timer expiry for timeouts */
1920
#define BLK_MAX_TIMEOUT (5 * HZ)
@@ -358,8 +359,12 @@ struct bio *bio_split_zone_append(struct bio *bio,
358359
static inline bool bio_may_need_split(struct bio *bio,
359360
const struct queue_limits *lim)
360361
{
361-
return lim->chunk_sectors || bio->bi_vcnt != 1 ||
362-
bio->bi_io_vec->bv_len + bio->bi_io_vec->bv_offset > PAGE_SIZE;
362+
if (lim->chunk_sectors)
363+
return true;
364+
if (bio->bi_vcnt != 1)
365+
return true;
366+
return bio->bi_io_vec->bv_len + bio->bi_io_vec->bv_offset >
367+
lim->min_segment_size;
363368
}
364369

365370
/**

include/linux/blkdev.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -196,10 +196,11 @@ struct gendisk {
196196
unsigned int zone_capacity;
197197
unsigned int last_zone_capacity;
198198
unsigned long __rcu *conv_zones_bitmap;
199-
unsigned int zone_wplugs_hash_bits;
200-
spinlock_t zone_wplugs_lock;
199+
unsigned int zone_wplugs_hash_bits;
200+
atomic_t nr_zone_wplugs;
201+
spinlock_t zone_wplugs_lock;
201202
struct mempool_s *zone_wplugs_pool;
202-
struct hlist_head *zone_wplugs_hash;
203+
struct hlist_head *zone_wplugs_hash;
203204
struct workqueue_struct *zone_wplugs_wq;
204205
#endif /* CONFIG_BLK_DEV_ZONED */
205206

@@ -367,6 +368,7 @@ struct queue_limits {
367368
unsigned int max_sectors;
368369
unsigned int max_user_sectors;
369370
unsigned int max_segment_size;
371+
unsigned int min_segment_size;
370372
unsigned int physical_block_size;
371373
unsigned int logical_block_size;
372374
unsigned int alignment_offset;

0 commit comments

Comments
 (0)