Skip to content

Commit 7c989b1

Browse files
committed
Merge tag 'for-6.1/passthrough-2022-10-04' of git://git.kernel.dk/linux
Pull passthrough updates from Jens Axboe: "With these changes, passthrough NVMe support over io_uring now performs at the same level as block device O_DIRECT, and in many cases 6-8% better. This contains: - Add support for fixed buffers for passthrough (Anuj, Kanchan) - Enable batched allocations and freeing on passthrough, similarly to what we support on the normal storage path (me) - Fix from Geert fixing an issue with !CONFIG_IO_URING" * tag 'for-6.1/passthrough-2022-10-04' of git://git.kernel.dk/linux: io_uring: Add missing inline to io_uring_cmd_import_fixed() dummy nvme: wire up fixed buffer support for nvme passthrough nvme: pass ubuffer as an integer block: extend functionality to map bvec iterator block: factor out blk_rq_map_bio_alloc helper block: rename bio_map_put to blk_mq_map_bio_put nvme: refactor nvme_alloc_request nvme: refactor nvme_add_user_metadata nvme: Use blk_rq_map_user_io helper scsi: Use blk_rq_map_user_io helper block: add blk_rq_map_user_io io_uring: introduce fixed buffer support for io_uring_cmd io_uring: add io_uring_cmd_import_fixed nvme: enable batched completions of passthrough IO nvme: split out metadata vs non metadata end_io uring_cmd completions block: allow end_io based requests in the completion batch handling block: change request end_io handler to pass back a return value block: enable batched allocation for blk_mq_alloc_request() block: kill deprecated BUG_ON() in the flush handling
2 parents 5133898 + 0e0abad commit 7c989b1

File tree

18 files changed

+476
-181
lines changed

18 files changed

+476
-181
lines changed

block/blk-flush.c

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,6 @@ static void blk_flush_complete_seq(struct request *rq,
205205
* flush data request completion path. Restore @rq for
206206
* normal completion and end it.
207207
*/
208-
BUG_ON(!list_empty(&rq->queuelist));
209208
list_del_init(&rq->flush.list);
210209
blk_flush_restore_request(rq);
211210
blk_mq_end_request(rq, error);
@@ -218,7 +217,8 @@ static void blk_flush_complete_seq(struct request *rq,
218217
blk_kick_flush(q, fq, cmd_flags);
219218
}
220219

221-
static void flush_end_io(struct request *flush_rq, blk_status_t error)
220+
static enum rq_end_io_ret flush_end_io(struct request *flush_rq,
221+
blk_status_t error)
222222
{
223223
struct request_queue *q = flush_rq->q;
224224
struct list_head *running;
@@ -232,7 +232,7 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
232232
if (!req_ref_put_and_test(flush_rq)) {
233233
fq->rq_status = error;
234234
spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
235-
return;
235+
return RQ_END_IO_NONE;
236236
}
237237

238238
blk_account_io_flush(flush_rq);
@@ -269,6 +269,7 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
269269
}
270270

271271
spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
272+
return RQ_END_IO_NONE;
272273
}
273274

274275
bool is_flush_rq(struct request *rq)
@@ -354,7 +355,8 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
354355
blk_flush_queue_rq(flush_rq, false);
355356
}
356357

357-
static void mq_flush_data_end_io(struct request *rq, blk_status_t error)
358+
static enum rq_end_io_ret mq_flush_data_end_io(struct request *rq,
359+
blk_status_t error)
358360
{
359361
struct request_queue *q = rq->q;
360362
struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
@@ -376,6 +378,7 @@ static void mq_flush_data_end_io(struct request *rq, blk_status_t error)
376378
spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
377379

378380
blk_mq_sched_restart(hctx);
381+
return RQ_END_IO_NONE;
379382
}
380383

381384
/**

block/blk-map.c

Lines changed: 132 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data,
231231
return ret;
232232
}
233233

234-
static void bio_map_put(struct bio *bio)
234+
static void blk_mq_map_bio_put(struct bio *bio)
235235
{
236236
if (bio->bi_opf & REQ_ALLOC_CACHE) {
237237
bio_put(bio);
@@ -241,31 +241,42 @@ static void bio_map_put(struct bio *bio)
241241
}
242242
}
243243

244-
static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
245-
gfp_t gfp_mask)
244+
static struct bio *blk_rq_map_bio_alloc(struct request *rq,
245+
unsigned int nr_vecs, gfp_t gfp_mask)
246246
{
247-
unsigned int max_sectors = queue_max_hw_sectors(rq->q);
248-
unsigned int nr_vecs = iov_iter_npages(iter, BIO_MAX_VECS);
249247
struct bio *bio;
250-
int ret;
251-
int j;
252-
253-
if (!iov_iter_count(iter))
254-
return -EINVAL;
255248

256249
if (rq->cmd_flags & REQ_POLLED) {
257250
blk_opf_t opf = rq->cmd_flags | REQ_ALLOC_CACHE;
258251

259252
bio = bio_alloc_bioset(NULL, nr_vecs, opf, gfp_mask,
260253
&fs_bio_set);
261254
if (!bio)
262-
return -ENOMEM;
255+
return NULL;
263256
} else {
264257
bio = bio_kmalloc(nr_vecs, gfp_mask);
265258
if (!bio)
266-
return -ENOMEM;
259+
return NULL;
267260
bio_init(bio, NULL, bio->bi_inline_vecs, nr_vecs, req_op(rq));
268261
}
262+
return bio;
263+
}
264+
265+
static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
266+
gfp_t gfp_mask)
267+
{
268+
unsigned int max_sectors = queue_max_hw_sectors(rq->q);
269+
unsigned int nr_vecs = iov_iter_npages(iter, BIO_MAX_VECS);
270+
struct bio *bio;
271+
int ret;
272+
int j;
273+
274+
if (!iov_iter_count(iter))
275+
return -EINVAL;
276+
277+
bio = blk_rq_map_bio_alloc(rq, nr_vecs, gfp_mask);
278+
if (bio == NULL)
279+
return -ENOMEM;
269280

270281
while (iov_iter_count(iter)) {
271282
struct page **pages, *stack_pages[UIO_FASTIOV];
@@ -331,7 +342,7 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
331342

332343
out_unmap:
333344
bio_release_pages(bio, false);
334-
bio_map_put(bio);
345+
blk_mq_map_bio_put(bio);
335346
return ret;
336347
}
337348

@@ -537,6 +548,62 @@ int blk_rq_append_bio(struct request *rq, struct bio *bio)
537548
}
538549
EXPORT_SYMBOL(blk_rq_append_bio);
539550

551+
/* Prepare bio for passthrough IO given ITER_BVEC iter */
552+
static int blk_rq_map_user_bvec(struct request *rq, const struct iov_iter *iter)
553+
{
554+
struct request_queue *q = rq->q;
555+
size_t nr_iter = iov_iter_count(iter);
556+
size_t nr_segs = iter->nr_segs;
557+
struct bio_vec *bvecs, *bvprvp = NULL;
558+
struct queue_limits *lim = &q->limits;
559+
unsigned int nsegs = 0, bytes = 0;
560+
struct bio *bio;
561+
size_t i;
562+
563+
if (!nr_iter || (nr_iter >> SECTOR_SHIFT) > queue_max_hw_sectors(q))
564+
return -EINVAL;
565+
if (nr_segs > queue_max_segments(q))
566+
return -EINVAL;
567+
568+
/* no iovecs to alloc, as we already have a BVEC iterator */
569+
bio = blk_rq_map_bio_alloc(rq, 0, GFP_KERNEL);
570+
if (bio == NULL)
571+
return -ENOMEM;
572+
573+
bio_iov_bvec_set(bio, (struct iov_iter *)iter);
574+
blk_rq_bio_prep(rq, bio, nr_segs);
575+
576+
/* loop to perform a bunch of sanity checks */
577+
bvecs = (struct bio_vec *)iter->bvec;
578+
for (i = 0; i < nr_segs; i++) {
579+
struct bio_vec *bv = &bvecs[i];
580+
581+
/*
582+
* If the queue doesn't support SG gaps and adding this
583+
* offset would create a gap, fallback to copy.
584+
*/
585+
if (bvprvp && bvec_gap_to_prev(lim, bvprvp, bv->bv_offset)) {
586+
blk_mq_map_bio_put(bio);
587+
return -EREMOTEIO;
588+
}
589+
/* check full condition */
590+
if (nsegs >= nr_segs || bytes > UINT_MAX - bv->bv_len)
591+
goto put_bio;
592+
if (bytes + bv->bv_len > nr_iter)
593+
goto put_bio;
594+
if (bv->bv_offset + bv->bv_len > PAGE_SIZE)
595+
goto put_bio;
596+
597+
nsegs++;
598+
bytes += bv->bv_len;
599+
bvprvp = bv;
600+
}
601+
return 0;
602+
put_bio:
603+
blk_mq_map_bio_put(bio);
604+
return -EINVAL;
605+
}
606+
540607
/**
541608
* blk_rq_map_user_iov - map user data to a request, for passthrough requests
542609
* @q: request queue where request should be inserted
@@ -556,24 +623,35 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
556623
struct rq_map_data *map_data,
557624
const struct iov_iter *iter, gfp_t gfp_mask)
558625
{
559-
bool copy = false;
626+
bool copy = false, map_bvec = false;
560627
unsigned long align = q->dma_pad_mask | queue_dma_alignment(q);
561628
struct bio *bio = NULL;
562629
struct iov_iter i;
563630
int ret = -EINVAL;
564631

565-
if (!iter_is_iovec(iter))
566-
goto fail;
567-
568632
if (map_data)
569633
copy = true;
570634
else if (blk_queue_may_bounce(q))
571635
copy = true;
572636
else if (iov_iter_alignment(iter) & align)
573637
copy = true;
638+
else if (iov_iter_is_bvec(iter))
639+
map_bvec = true;
640+
else if (!iter_is_iovec(iter))
641+
copy = true;
574642
else if (queue_virt_boundary(q))
575643
copy = queue_virt_boundary(q) & iov_iter_gap_alignment(iter);
576644

645+
if (map_bvec) {
646+
ret = blk_rq_map_user_bvec(rq, iter);
647+
if (!ret)
648+
return 0;
649+
if (ret != -EREMOTEIO)
650+
goto fail;
651+
/* fall back to copying the data on limits mismatches */
652+
copy = true;
653+
}
654+
577655
i = *iter;
578656
do {
579657
if (copy)
@@ -611,6 +689,42 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq,
611689
}
612690
EXPORT_SYMBOL(blk_rq_map_user);
613691

692+
int blk_rq_map_user_io(struct request *req, struct rq_map_data *map_data,
693+
void __user *ubuf, unsigned long buf_len, gfp_t gfp_mask,
694+
bool vec, int iov_count, bool check_iter_count, int rw)
695+
{
696+
int ret = 0;
697+
698+
if (vec) {
699+
struct iovec fast_iov[UIO_FASTIOV];
700+
struct iovec *iov = fast_iov;
701+
struct iov_iter iter;
702+
703+
ret = import_iovec(rw, ubuf, iov_count ? iov_count : buf_len,
704+
UIO_FASTIOV, &iov, &iter);
705+
if (ret < 0)
706+
return ret;
707+
708+
if (iov_count) {
709+
/* SG_IO howto says that the shorter of the two wins */
710+
iov_iter_truncate(&iter, buf_len);
711+
if (check_iter_count && !iov_iter_count(&iter)) {
712+
kfree(iov);
713+
return -EINVAL;
714+
}
715+
}
716+
717+
ret = blk_rq_map_user_iov(req->q, req, map_data, &iter,
718+
gfp_mask);
719+
kfree(iov);
720+
} else if (buf_len) {
721+
ret = blk_rq_map_user(req->q, req, map_data, ubuf, buf_len,
722+
gfp_mask);
723+
}
724+
return ret;
725+
}
726+
EXPORT_SYMBOL(blk_rq_map_user_io);
727+
614728
/**
615729
* blk_rq_unmap_user - unmap a request with user data
616730
* @bio: start of bio list
@@ -636,7 +750,7 @@ int blk_rq_unmap_user(struct bio *bio)
636750

637751
next_bio = bio;
638752
bio = bio->bi_next;
639-
bio_map_put(next_bio);
753+
blk_mq_map_bio_put(next_bio);
640754
}
641755

642756
return ret;

0 commit comments

Comments
 (0)