Skip to content

Commit 9feb1af

Browse files
committed
Merge tag 'for-linus-20191205' of git://git.kernel.dk/linux-block
Pull more block and io_uring updates from Jens Axboe: "I wasn't expecting this to be so big, and if I was, I would have used separate branches for this. Going forward I'll be doing separate branches for the current tree, just like for the next kernel version tree. In any case, this contains: - Series from Christoph that fixes an inherent race condition with zoned devices and revalidation. - null_blk zone size fix (Damien) - Fix for a regression in this merge window that caused busy spins by sending empty disk uevents (Eric) - Fix for a regression in this merge window for bfq stats (Hou) - Fix for io_uring creds allocation failure handling (me) - io_uring -ERESTARTSYS send/recvmsg fix (me) - Series that fixes the need for applications to retain state across async request punts for io_uring. This one is a bit larger than I would have hoped, but I think it's important we get this fixed for 5.5. - connect(2) improvement for io_uring, handling EINPROGRESS instead of having applications needing to poll for it (me) - Have io_uring use a hash for poll requests instead of an rbtree. This turned out to work much better in practice, so I think we should make the switch now. For some workloads, even with a fair amount of cancellations, the insertion sort is just too expensive. (me) - Various little io_uring fixes (me, Jackie, Pavel, LimingWu) - Fix for brd unaligned IO, and a warning for the future (Ming) - Fix for a bio integrity data leak (Justin) - bvec_iter_advance() improvement (Pavel) - Xen blkback page unmap fix (SeongJae) The major items in here are all well tested, and on the liburing side we continue to add regression and feature test cases. We're up to 50 topic cases now, each with anywhere from 1 to more than 10 cases in each" * tag 'for-linus-20191205' of git://git.kernel.dk/linux-block: (33 commits) block: fix memleak of bio integrity data io_uring: fix a typo in a comment bfq-iosched: Ensure bio->bi_blkg is valid before using it io_uring: hook all linked requests via link_list io_uring: fix error handling in io_queue_link_head io_uring: use hash table for poll command lookups io-wq: clear node->next on list deletion io_uring: ensure deferred timeouts copy necessary data io_uring: allow IO_SQE_* flags on IORING_OP_TIMEOUT null_blk: remove unused variable warning on !CONFIG_BLK_DEV_ZONED brd: warn on un-aligned buffer brd: remove max_hw_sectors queue limit xen/blkback: Avoid unmapping unmapped grant pages io_uring: handle connect -EINPROGRESS like -EAGAIN block: set the zone size in blk_revalidate_disk_zones atomically block: don't handle bio based drivers in blk_revalidate_disk_zones block: allocate the zone bitmaps lazily block: replace seq_zones_bitmap with conv_zones_bitmap block: simplify blkdev_nr_zones block: remove the empty line at the end of blk-zoned.c ...
2 parents 0aecba6 + 8539429 commit 9feb1af

File tree

21 files changed

+672
-406
lines changed

21 files changed

+672
-406
lines changed

block/bfq-cgroup.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,9 @@ void bfqg_stats_update_legacy_io(struct request_queue *q, struct request *rq)
351351
{
352352
struct bfq_group *bfqg = blkg_to_bfqg(rq->bio->bi_blkg);
353353

354+
if (!bfqg)
355+
return;
356+
354357
blkg_rwstat_add(&bfqg->stats.bytes, rq->cmd_flags, blk_rq_bytes(rq));
355358
blkg_rwstat_add(&bfqg->stats.ios, rq->cmd_flags, 1);
356359
}

block/bio-integrity.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ EXPORT_SYMBOL(bio_integrity_alloc);
8787
* Description: Used to free the integrity portion of a bio. Usually
8888
* called from bio_free().
8989
*/
90-
static void bio_integrity_free(struct bio *bio)
90+
void bio_integrity_free(struct bio *bio)
9191
{
9292
struct bio_integrity_payload *bip = bio_integrity(bio);
9393
struct bio_set *bs = bio->bi_pool;

block/bio.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,9 @@ struct bio_vec *bvec_alloc(gfp_t gfp_mask, int nr, unsigned long *idx,
233233
void bio_uninit(struct bio *bio)
234234
{
235235
bio_disassociate_blkg(bio);
236+
237+
if (bio_integrity(bio))
238+
bio_integrity_free(bio);
236239
}
237240
EXPORT_SYMBOL(bio_uninit);
238241

block/blk-zoned.c

Lines changed: 69 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -70,30 +70,20 @@ void __blk_req_zone_write_unlock(struct request *rq)
7070
}
7171
EXPORT_SYMBOL_GPL(__blk_req_zone_write_unlock);
7272

73-
static inline unsigned int __blkdev_nr_zones(struct request_queue *q,
74-
sector_t nr_sectors)
75-
{
76-
sector_t zone_sectors = blk_queue_zone_sectors(q);
77-
78-
return (nr_sectors + zone_sectors - 1) >> ilog2(zone_sectors);
79-
}
80-
8173
/**
8274
* blkdev_nr_zones - Get number of zones
83-
* @bdev: Target block device
75+
* @disk: Target gendisk
8476
*
85-
* Description:
86-
* Return the total number of zones of a zoned block device.
87-
* For a regular block device, the number of zones is always 0.
77+
* Return the total number of zones of a zoned block device. For a block
78+
* device without zone capabilities, the number of zones is always 0.
8879
*/
89-
unsigned int blkdev_nr_zones(struct block_device *bdev)
80+
unsigned int blkdev_nr_zones(struct gendisk *disk)
9081
{
91-
struct request_queue *q = bdev_get_queue(bdev);
82+
sector_t zone_sectors = blk_queue_zone_sectors(disk->queue);
9283

93-
if (!blk_queue_is_zoned(q))
84+
if (!blk_queue_is_zoned(disk->queue))
9485
return 0;
95-
96-
return __blkdev_nr_zones(q, get_capacity(bdev->bd_disk));
86+
return (get_capacity(disk) + zone_sectors - 1) >> ilog2(zone_sectors);
9787
}
9888
EXPORT_SYMBOL_GPL(blkdev_nr_zones);
9989

@@ -342,16 +332,18 @@ static inline unsigned long *blk_alloc_zone_bitmap(int node,
342332

343333
void blk_queue_free_zone_bitmaps(struct request_queue *q)
344334
{
345-
kfree(q->seq_zones_bitmap);
346-
q->seq_zones_bitmap = NULL;
335+
kfree(q->conv_zones_bitmap);
336+
q->conv_zones_bitmap = NULL;
347337
kfree(q->seq_zones_wlock);
348338
q->seq_zones_wlock = NULL;
349339
}
350340

351341
struct blk_revalidate_zone_args {
352342
struct gendisk *disk;
353-
unsigned long *seq_zones_bitmap;
343+
unsigned long *conv_zones_bitmap;
354344
unsigned long *seq_zones_wlock;
345+
unsigned int nr_zones;
346+
sector_t zone_sectors;
355347
sector_t sector;
356348
};
357349

@@ -364,25 +356,33 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
364356
struct blk_revalidate_zone_args *args = data;
365357
struct gendisk *disk = args->disk;
366358
struct request_queue *q = disk->queue;
367-
sector_t zone_sectors = blk_queue_zone_sectors(q);
368359
sector_t capacity = get_capacity(disk);
369360

370361
/*
371362
* All zones must have the same size, with the exception on an eventual
372363
* smaller last zone.
373364
*/
374-
if (zone->start + zone_sectors < capacity &&
375-
zone->len != zone_sectors) {
376-
pr_warn("%s: Invalid zoned device with non constant zone size\n",
377-
disk->disk_name);
378-
return false;
379-
}
365+
if (zone->start == 0) {
366+
if (zone->len == 0 || !is_power_of_2(zone->len)) {
367+
pr_warn("%s: Invalid zoned device with non power of two zone size (%llu)\n",
368+
disk->disk_name, zone->len);
369+
return -ENODEV;
370+
}
380371

381-
if (zone->start + zone->len >= capacity &&
382-
zone->len > zone_sectors) {
383-
pr_warn("%s: Invalid zoned device with larger last zone size\n",
384-
disk->disk_name);
385-
return -ENODEV;
372+
args->zone_sectors = zone->len;
373+
args->nr_zones = (capacity + zone->len - 1) >> ilog2(zone->len);
374+
} else if (zone->start + args->zone_sectors < capacity) {
375+
if (zone->len != args->zone_sectors) {
376+
pr_warn("%s: Invalid zoned device with non constant zone size\n",
377+
disk->disk_name);
378+
return -ENODEV;
379+
}
380+
} else {
381+
if (zone->len > args->zone_sectors) {
382+
pr_warn("%s: Invalid zoned device with larger last zone size\n",
383+
disk->disk_name);
384+
return -ENODEV;
385+
}
386386
}
387387

388388
/* Check for holes in the zone report */
@@ -395,87 +395,77 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
395395
/* Check zone type */
396396
switch (zone->type) {
397397
case BLK_ZONE_TYPE_CONVENTIONAL:
398+
if (!args->conv_zones_bitmap) {
399+
args->conv_zones_bitmap =
400+
blk_alloc_zone_bitmap(q->node, args->nr_zones);
401+
if (!args->conv_zones_bitmap)
402+
return -ENOMEM;
403+
}
404+
set_bit(idx, args->conv_zones_bitmap);
405+
break;
398406
case BLK_ZONE_TYPE_SEQWRITE_REQ:
399407
case BLK_ZONE_TYPE_SEQWRITE_PREF:
408+
if (!args->seq_zones_wlock) {
409+
args->seq_zones_wlock =
410+
blk_alloc_zone_bitmap(q->node, args->nr_zones);
411+
if (!args->seq_zones_wlock)
412+
return -ENOMEM;
413+
}
400414
break;
401415
default:
402416
pr_warn("%s: Invalid zone type 0x%x at sectors %llu\n",
403417
disk->disk_name, (int)zone->type, zone->start);
404418
return -ENODEV;
405419
}
406420

407-
if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL)
408-
set_bit(idx, args->seq_zones_bitmap);
409-
410421
args->sector += zone->len;
411422
return 0;
412423
}
413424

414-
static int blk_update_zone_info(struct gendisk *disk, unsigned int nr_zones,
415-
struct blk_revalidate_zone_args *args)
416-
{
417-
/*
418-
* Ensure that all memory allocations in this context are done as
419-
* if GFP_NOIO was specified.
420-
*/
421-
unsigned int noio_flag = memalloc_noio_save();
422-
struct request_queue *q = disk->queue;
423-
int ret;
424-
425-
args->seq_zones_wlock = blk_alloc_zone_bitmap(q->node, nr_zones);
426-
if (!args->seq_zones_wlock)
427-
return -ENOMEM;
428-
args->seq_zones_bitmap = blk_alloc_zone_bitmap(q->node, nr_zones);
429-
if (!args->seq_zones_bitmap)
430-
return -ENOMEM;
431-
432-
ret = disk->fops->report_zones(disk, 0, nr_zones,
433-
blk_revalidate_zone_cb, args);
434-
memalloc_noio_restore(noio_flag);
435-
return ret;
436-
}
437-
438425
/**
439426
* blk_revalidate_disk_zones - (re)allocate and initialize zone bitmaps
440427
* @disk: Target disk
441428
*
442429
* Helper function for low-level device drivers to (re) allocate and initialize
443430
* a disk request queue zone bitmaps. This functions should normally be called
444-
* within the disk ->revalidate method. For BIO based queues, no zone bitmap
445-
* is allocated.
431+
* within the disk ->revalidate method for blk-mq based drivers. For BIO based
432+
* drivers only q->nr_zones needs to be updated so that the sysfs exposed value
433+
* is correct.
446434
*/
447435
int blk_revalidate_disk_zones(struct gendisk *disk)
448436
{
449437
struct request_queue *q = disk->queue;
450-
unsigned int nr_zones = __blkdev_nr_zones(q, get_capacity(disk));
451-
struct blk_revalidate_zone_args args = { .disk = disk };
452-
int ret = 0;
438+
struct blk_revalidate_zone_args args = {
439+
.disk = disk,
440+
};
441+
unsigned int noio_flag;
442+
int ret;
453443

454444
if (WARN_ON_ONCE(!blk_queue_is_zoned(q)))
455445
return -EIO;
446+
if (WARN_ON_ONCE(!queue_is_mq(q)))
447+
return -EIO;
456448

457449
/*
458-
* BIO based queues do not use a scheduler so only q->nr_zones
459-
* needs to be updated so that the sysfs exposed value is correct.
450+
* Ensure that all memory allocations in this context are done as if
451+
* GFP_NOIO was specified.
460452
*/
461-
if (!queue_is_mq(q)) {
462-
q->nr_zones = nr_zones;
463-
return 0;
464-
}
465-
466-
if (nr_zones)
467-
ret = blk_update_zone_info(disk, nr_zones, &args);
453+
noio_flag = memalloc_noio_save();
454+
ret = disk->fops->report_zones(disk, 0, UINT_MAX,
455+
blk_revalidate_zone_cb, &args);
456+
memalloc_noio_restore(noio_flag);
468457

469458
/*
470-
* Install the new bitmaps, making sure the queue is stopped and
471-
* all I/Os are completed (i.e. a scheduler is not referencing the
472-
* bitmaps).
459+
* Install the new bitmaps and update nr_zones only once the queue is
460+
* stopped and all I/Os are completed (i.e. a scheduler is not
461+
* referencing the bitmaps).
473462
*/
474463
blk_mq_freeze_queue(q);
475464
if (ret >= 0) {
476-
q->nr_zones = nr_zones;
465+
blk_queue_chunk_sectors(q, args.zone_sectors);
466+
q->nr_zones = args.nr_zones;
477467
swap(q->seq_zones_wlock, args.seq_zones_wlock);
478-
swap(q->seq_zones_bitmap, args.seq_zones_bitmap);
468+
swap(q->conv_zones_bitmap, args.conv_zones_bitmap);
479469
ret = 0;
480470
} else {
481471
pr_warn("%s: failed to revalidate zones\n", disk->disk_name);
@@ -484,8 +474,7 @@ int blk_revalidate_disk_zones(struct gendisk *disk)
484474
blk_mq_unfreeze_queue(q);
485475

486476
kfree(args.seq_zones_wlock);
487-
kfree(args.seq_zones_bitmap);
477+
kfree(args.conv_zones_bitmap);
488478
return ret;
489479
}
490480
EXPORT_SYMBOL_GPL(blk_revalidate_disk_zones);
491-

block/blk.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ static inline void blk_rq_bio_prep(struct request *rq, struct bio *bio,
121121
#ifdef CONFIG_BLK_DEV_INTEGRITY
122122
void blk_flush_integrity(void);
123123
bool __bio_integrity_endio(struct bio *);
124+
void bio_integrity_free(struct bio *bio);
124125
static inline bool bio_integrity_endio(struct bio *bio)
125126
{
126127
if (bio_integrity(bio))
@@ -166,6 +167,9 @@ static inline bool bio_integrity_endio(struct bio *bio)
166167
{
167168
return true;
168169
}
170+
static inline void bio_integrity_free(struct bio *bio)
171+
{
172+
}
169173
#endif /* CONFIG_BLK_DEV_INTEGRITY */
170174

171175
unsigned long blk_rq_timeout(unsigned long timeout);

block/ioctl.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -512,7 +512,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
512512
case BLKGETZONESZ:
513513
return put_uint(arg, bdev_zone_sectors(bdev));
514514
case BLKGETNRZONES:
515-
return put_uint(arg, blkdev_nr_zones(bdev));
515+
return put_uint(arg, blkdev_nr_zones(bdev->bd_disk));
516516
case HDIO_GETGEO:
517517
return blkdev_getgeo(bdev, argp);
518518
case BLKRAGET:

drivers/block/brd.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,10 @@ static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio)
297297
unsigned int len = bvec.bv_len;
298298
int err;
299299

300+
/* Don't support un-aligned buffer */
301+
WARN_ON_ONCE((bvec.bv_offset & (SECTOR_SIZE - 1)) ||
302+
(len & (SECTOR_SIZE - 1)));
303+
300304
err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset,
301305
bio_op(bio), sector);
302306
if (err)
@@ -382,7 +386,6 @@ static struct brd_device *brd_alloc(int i)
382386
goto out_free_dev;
383387

384388
blk_queue_make_request(brd->brd_queue, brd_make_request);
385-
blk_queue_max_hw_sectors(brd->brd_queue, 1024);
386389

387390
/* This is so fdisk will align partitions on 4k, because of
388391
* direct_access API needing 4k alignment, returning a PFN

0 commit comments

Comments
 (0)