Skip to content

Commit 6aeb4f8

Browse files
Christoph Hellwigaxboe
authored andcommitted
block: remove bio_add_pc_page
Lift bio_split_rw_at into blk_rq_append_bio so that it validates the hardware limits. With this all passthrough callers can simply add bio_add_page to build the bio and delay checking for exceeding of limits to this point instead of doing it for each page. While this looks like adding a new expensive loop over all bio_vecs, blk_rq_append_bio is already doing that just to counter the number of segments. Signed-off-by: Christoph Hellwig <[email protected]> Reviewed-by: Sagi Grimberg <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jens Axboe <[email protected]>
1 parent c2398e6 commit 6aeb4f8

File tree

7 files changed

+48
-214
lines changed

7 files changed

+48
-214
lines changed

block/bio.c

Lines changed: 5 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -946,8 +946,11 @@ static bool bvec_try_merge_page(struct bio_vec *bv, struct page *page,
946946

947947
/*
948948
* Try to merge a page into a segment, while obeying the hardware segment
949-
* size limit. This is not for normal read/write bios, but for passthrough
950-
* or Zone Append operations that we can't split.
949+
* size limit.
950+
*
951+
* This is kept around for the integrity metadata, which is still tries
952+
* to build the initial bio to the hardware limit and doesn't have proper
953+
* helpers to split. Hopefully this will go away soon.
951954
*/
952955
bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv,
953956
struct page *page, unsigned len, unsigned offset,
@@ -964,106 +967,6 @@ bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv,
964967
return bvec_try_merge_page(bv, page, len, offset, same_page);
965968
}
966969

967-
/**
968-
* bio_add_hw_page - attempt to add a page to a bio with hw constraints
969-
* @q: the target queue
970-
* @bio: destination bio
971-
* @page: page to add
972-
* @len: vec entry length
973-
* @offset: vec entry offset
974-
* @max_sectors: maximum number of sectors that can be added
975-
* @same_page: return if the segment has been merged inside the same page
976-
*
977-
* Add a page to a bio while respecting the hardware max_sectors, max_segment
978-
* and gap limitations.
979-
*/
980-
int bio_add_hw_page(struct request_queue *q, struct bio *bio,
981-
struct page *page, unsigned int len, unsigned int offset,
982-
unsigned int max_sectors, bool *same_page)
983-
{
984-
unsigned int max_size = max_sectors << SECTOR_SHIFT;
985-
986-
if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
987-
return 0;
988-
989-
len = min3(len, max_size, queue_max_segment_size(q));
990-
if (len > max_size - bio->bi_iter.bi_size)
991-
return 0;
992-
993-
if (bio->bi_vcnt > 0) {
994-
struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
995-
996-
if (bvec_try_merge_hw_page(q, bv, page, len, offset,
997-
same_page)) {
998-
bio->bi_iter.bi_size += len;
999-
return len;
1000-
}
1001-
1002-
if (bio->bi_vcnt >=
1003-
min(bio->bi_max_vecs, queue_max_segments(q)))
1004-
return 0;
1005-
1006-
/*
1007-
* If the queue doesn't support SG gaps and adding this segment
1008-
* would create a gap, disallow it.
1009-
*/
1010-
if (bvec_gap_to_prev(&q->limits, bv, offset))
1011-
return 0;
1012-
}
1013-
1014-
bvec_set_page(&bio->bi_io_vec[bio->bi_vcnt], page, len, offset);
1015-
bio->bi_vcnt++;
1016-
bio->bi_iter.bi_size += len;
1017-
return len;
1018-
}
1019-
1020-
/**
1021-
* bio_add_hw_folio - attempt to add a folio to a bio with hw constraints
1022-
* @q: the target queue
1023-
* @bio: destination bio
1024-
* @folio: folio to add
1025-
* @len: vec entry length
1026-
* @offset: vec entry offset in the folio
1027-
* @max_sectors: maximum number of sectors that can be added
1028-
* @same_page: return if the segment has been merged inside the same folio
1029-
*
1030-
* Add a folio to a bio while respecting the hardware max_sectors, max_segment
1031-
* and gap limitations.
1032-
*/
1033-
int bio_add_hw_folio(struct request_queue *q, struct bio *bio,
1034-
struct folio *folio, size_t len, size_t offset,
1035-
unsigned int max_sectors, bool *same_page)
1036-
{
1037-
if (len > UINT_MAX || offset > UINT_MAX)
1038-
return 0;
1039-
return bio_add_hw_page(q, bio, folio_page(folio, 0), len, offset,
1040-
max_sectors, same_page);
1041-
}
1042-
1043-
/**
1044-
* bio_add_pc_page - attempt to add page to passthrough bio
1045-
* @q: the target queue
1046-
* @bio: destination bio
1047-
* @page: page to add
1048-
* @len: vec entry length
1049-
* @offset: vec entry offset
1050-
*
1051-
* Attempt to add a page to the bio_vec maplist. This can fail for a
1052-
* number of reasons, such as the bio being full or target block device
1053-
* limitations. The target block device must allow bio's up to PAGE_SIZE,
1054-
* so it is always possible to add a single page to an empty bio.
1055-
*
1056-
* This should only be used by passthrough bios.
1057-
*/
1058-
int bio_add_pc_page(struct request_queue *q, struct bio *bio,
1059-
struct page *page, unsigned int len, unsigned int offset)
1060-
{
1061-
bool same_page = false;
1062-
return bio_add_hw_page(q, bio, page, len, offset,
1063-
queue_max_hw_sectors(q), &same_page);
1064-
}
1065-
EXPORT_SYMBOL(bio_add_pc_page);
1066-
1067970
/**
1068971
* __bio_add_page - add page(s) to a bio in a new segment
1069972
* @bio: destination bio

block/blk-map.c

Lines changed: 29 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data,
189189
}
190190
}
191191

192-
if (bio_add_pc_page(rq->q, bio, page, bytes, offset) < bytes) {
192+
if (bio_add_page(bio, page, bytes, offset) < bytes) {
193193
if (!map_data)
194194
__free_page(page);
195195
break;
@@ -272,86 +272,27 @@ static struct bio *blk_rq_map_bio_alloc(struct request *rq,
272272
static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
273273
gfp_t gfp_mask)
274274
{
275-
iov_iter_extraction_t extraction_flags = 0;
276-
unsigned int max_sectors = queue_max_hw_sectors(rq->q);
277275
unsigned int nr_vecs = iov_iter_npages(iter, BIO_MAX_VECS);
278276
struct bio *bio;
279277
int ret;
280-
int j;
281278

282279
if (!iov_iter_count(iter))
283280
return -EINVAL;
284281

285282
bio = blk_rq_map_bio_alloc(rq, nr_vecs, gfp_mask);
286-
if (bio == NULL)
283+
if (!bio)
287284
return -ENOMEM;
288-
289-
if (blk_queue_pci_p2pdma(rq->q))
290-
extraction_flags |= ITER_ALLOW_P2PDMA;
291-
if (iov_iter_extract_will_pin(iter))
292-
bio_set_flag(bio, BIO_PAGE_PINNED);
293-
294-
while (iov_iter_count(iter)) {
295-
struct page *stack_pages[UIO_FASTIOV];
296-
struct page **pages = stack_pages;
297-
ssize_t bytes;
298-
size_t offs;
299-
int npages;
300-
301-
if (nr_vecs > ARRAY_SIZE(stack_pages))
302-
pages = NULL;
303-
304-
bytes = iov_iter_extract_pages(iter, &pages, LONG_MAX,
305-
nr_vecs, extraction_flags, &offs);
306-
if (unlikely(bytes <= 0)) {
307-
ret = bytes ? bytes : -EFAULT;
308-
goto out_unmap;
309-
}
310-
311-
npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE);
312-
313-
if (unlikely(offs & queue_dma_alignment(rq->q)))
314-
j = 0;
315-
else {
316-
for (j = 0; j < npages; j++) {
317-
struct page *page = pages[j];
318-
unsigned int n = PAGE_SIZE - offs;
319-
bool same_page = false;
320-
321-
if (n > bytes)
322-
n = bytes;
323-
324-
if (!bio_add_hw_page(rq->q, bio, page, n, offs,
325-
max_sectors, &same_page))
326-
break;
327-
328-
if (same_page)
329-
bio_release_page(bio, page);
330-
bytes -= n;
331-
offs = 0;
332-
}
333-
}
334-
/*
335-
* release the pages we didn't map into the bio, if any
336-
*/
337-
while (j < npages)
338-
bio_release_page(bio, pages[j++]);
339-
if (pages != stack_pages)
340-
kvfree(pages);
341-
/* couldn't stuff something into bio? */
342-
if (bytes) {
343-
iov_iter_revert(iter, bytes);
344-
break;
345-
}
346-
}
347-
285+
ret = bio_iov_iter_get_pages(bio, iter);
286+
if (ret)
287+
goto out_put;
348288
ret = blk_rq_append_bio(rq, bio);
349289
if (ret)
350-
goto out_unmap;
290+
goto out_release;
351291
return 0;
352292

353-
out_unmap:
293+
out_release:
354294
bio_release_pages(bio, false);
295+
out_put:
355296
blk_mq_map_bio_put(bio);
356297
return ret;
357298
}
@@ -422,8 +363,7 @@ static struct bio *bio_map_kern(struct request_queue *q, void *data,
422363
page = virt_to_page(data);
423364
else
424365
page = vmalloc_to_page(data);
425-
if (bio_add_pc_page(q, bio, page, bytes,
426-
offset) < bytes) {
366+
if (bio_add_page(bio, page, bytes, offset) < bytes) {
427367
/* we don't support partial mappings */
428368
bio_uninit(bio);
429369
kfree(bio);
@@ -507,7 +447,7 @@ static struct bio *bio_copy_kern(struct request_queue *q, void *data,
507447
if (!reading)
508448
memcpy(page_address(page), p, bytes);
509449

510-
if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes)
450+
if (bio_add_page(bio, page, bytes, 0) < bytes)
511451
break;
512452

513453
len -= bytes;
@@ -536,12 +476,19 @@ static struct bio *bio_copy_kern(struct request_queue *q, void *data,
536476
*/
537477
int blk_rq_append_bio(struct request *rq, struct bio *bio)
538478
{
539-
struct bvec_iter iter;
540-
struct bio_vec bv;
479+
const struct queue_limits *lim = &rq->q->limits;
480+
unsigned int max_bytes = lim->max_hw_sectors << SECTOR_SHIFT;
541481
unsigned int nr_segs = 0;
482+
int ret;
542483

543-
bio_for_each_bvec(bv, bio, iter)
544-
nr_segs++;
484+
/* check that the data layout matches the hardware restrictions */
485+
ret = bio_split_rw_at(bio, lim, &nr_segs, max_bytes);
486+
if (ret) {
487+
/* if we would have to split the bio, copy instead */
488+
if (ret > 0)
489+
ret = -EREMOTEIO;
490+
return ret;
491+
}
545492

546493
if (!rq->bio) {
547494
blk_rq_bio_prep(rq, bio, nr_segs);
@@ -561,9 +508,7 @@ EXPORT_SYMBOL(blk_rq_append_bio);
561508
/* Prepare bio for passthrough IO given ITER_BVEC iter */
562509
static int blk_rq_map_user_bvec(struct request *rq, const struct iov_iter *iter)
563510
{
564-
const struct queue_limits *lim = &rq->q->limits;
565-
unsigned int max_bytes = lim->max_hw_sectors << SECTOR_SHIFT;
566-
unsigned int nsegs;
511+
unsigned int max_bytes = rq->q->limits.max_hw_sectors << SECTOR_SHIFT;
567512
struct bio *bio;
568513
int ret;
569514

@@ -576,18 +521,10 @@ static int blk_rq_map_user_bvec(struct request *rq, const struct iov_iter *iter)
576521
return -ENOMEM;
577522
bio_iov_bvec_set(bio, iter);
578523

579-
/* check that the data layout matches the hardware restrictions */
580-
ret = bio_split_rw_at(bio, lim, &nsegs, max_bytes);
581-
if (ret) {
582-
/* if we would have to split the bio, copy instead */
583-
if (ret > 0)
584-
ret = -EREMOTEIO;
524+
ret = blk_rq_append_bio(rq, bio);
525+
if (ret)
585526
blk_mq_map_bio_put(bio);
586-
return ret;
587-
}
588-
589-
blk_rq_bio_prep(rq, bio, nsegs);
590-
return 0;
527+
return ret;
591528
}
592529

593530
/**
@@ -644,8 +581,11 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
644581
ret = bio_copy_user_iov(rq, map_data, &i, gfp_mask);
645582
else
646583
ret = bio_map_user_iov(rq, &i, gfp_mask);
647-
if (ret)
584+
if (ret) {
585+
if (ret == -EREMOTEIO)
586+
ret = -EINVAL;
648587
goto unmap_rq;
588+
}
649589
if (!bio)
650590
bio = rq->bio;
651591
} while (iov_iter_count(&i));

block/blk.h

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -556,14 +556,6 @@ void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors);
556556
struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
557557
struct lock_class_key *lkclass);
558558

559-
int bio_add_hw_page(struct request_queue *q, struct bio *bio,
560-
struct page *page, unsigned int len, unsigned int offset,
561-
unsigned int max_sectors, bool *same_page);
562-
563-
int bio_add_hw_folio(struct request_queue *q, struct bio *bio,
564-
struct folio *folio, size_t len, size_t offset,
565-
unsigned int max_sectors, bool *same_page);
566-
567559
/*
568560
* Clean up a page appropriately, where the page may be pinned, may have a
569561
* ref taken on it or neither.

drivers/nvme/target/passthru.c

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,7 @@ static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq)
261261
{
262262
struct scatterlist *sg;
263263
struct bio *bio;
264+
int ret = -EINVAL;
264265
int i;
265266

266267
if (req->sg_cnt > BIO_MAX_VECS)
@@ -277,16 +278,19 @@ static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq)
277278
}
278279

279280
for_each_sg(req->sg, sg, req->sg_cnt, i) {
280-
if (bio_add_pc_page(rq->q, bio, sg_page(sg), sg->length,
281-
sg->offset) < sg->length) {
282-
nvmet_req_bio_put(req, bio);
283-
return -EINVAL;
284-
}
281+
if (bio_add_page(bio, sg_page(sg), sg->length, sg->offset) <
282+
sg->length)
283+
goto out_bio_put;
285284
}
286285

287-
blk_rq_bio_prep(rq, bio, req->sg_cnt);
288-
286+
ret = blk_rq_append_bio(rq, bio);
287+
if (ret)
288+
goto out_bio_put;
289289
return 0;
290+
291+
out_bio_put:
292+
nvmet_req_bio_put(req, bio);
293+
return ret;
290294
}
291295

292296
static void nvmet_passthru_execute_cmd(struct nvmet_req *req)

drivers/nvme/target/zns.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -586,8 +586,7 @@ void nvmet_bdev_execute_zone_append(struct nvmet_req *req)
586586
for_each_sg(req->sg, sg, req->sg_cnt, sg_cnt) {
587587
unsigned int len = sg->length;
588588

589-
if (bio_add_pc_page(bdev_get_queue(bio->bi_bdev), bio,
590-
sg_page(sg), len, sg->offset) != len) {
589+
if (bio_add_page(bio, sg_page(sg), len, sg->offset) != len) {
591590
status = NVME_SC_INTERNAL;
592591
goto out_put_bio;
593592
}

drivers/target/target_core_pscsi.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -823,7 +823,6 @@ static sense_reason_t
823823
pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
824824
struct request *req)
825825
{
826-
struct pscsi_dev_virt *pdv = PSCSI_DEV(cmd->se_dev);
827826
struct bio *bio = NULL;
828827
struct page *page;
829828
struct scatterlist *sg;
@@ -871,12 +870,11 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
871870
(rw) ? "rw" : "r", nr_vecs);
872871
}
873872

874-
pr_debug("PSCSI: Calling bio_add_pc_page() i: %d"
873+
pr_debug("PSCSI: Calling bio_add_page() i: %d"
875874
" bio: %p page: %p len: %d off: %d\n", i, bio,
876875
page, len, off);
877876

878-
rc = bio_add_pc_page(pdv->pdv_sd->request_queue,
879-
bio, page, bytes, off);
877+
rc = bio_add_page(bio, page, bytes, off);
880878
pr_debug("PSCSI: bio->bi_vcnt: %d nr_vecs: %d\n",
881879
bio_segments(bio), nr_vecs);
882880
if (rc != bytes) {

0 commit comments

Comments
 (0)