Skip to content

Commit cd2c754

Browse files
Changheunaxboe
authored andcommitted
bio: limit bio max size
bio size can grow up to 4GB when muli-page bvec is enabled. but sometimes it would lead to inefficient behaviors. in case of large chunk direct I/O, - 32MB chunk read in user space - all pages for 32MB would be merged to a bio structure if the pages physical addresses are contiguous. it makes some delay to submit until merge complete. bio max size should be limited to a proper size. When 32MB chunk read with direct I/O option is coming from userspace, kernel behavior is below now in do_direct_IO() loop. it's timeline. | bio merge for 32MB. total 8,192 pages are merged. | total elapsed time is over 2ms. |------------------ ... ----------------------->| | 8,192 pages merged a bio. | at this time, first bio submit is done. | 1 bio is split to 32 read request and issue. |---------------> |---------------> |---------------> ...... |---------------> |--------------->| total 19ms elapsed to complete 32MB read done from device. | If bio max size is limited with 1MB, behavior is changed below. | bio merge for 1MB. 256 pages are merged for each bio. | total 32 bio will be made. | total elapsed time is over 2ms. it's same. | but, first bio submit timing is fast. about 100us. |--->|--->|--->|---> ... -->|--->|--->|--->|--->| | 256 pages merged a bio. | at this time, first bio submit is done. | and 1 read request is issued for 1 bio. |---------------> |---------------> |---------------> ...... |---------------> |--------------->| total 17ms elapsed to complete 32MB read done from device. | As a result, read request issue timing is faster if bio max size is limited. Current kernel behavior with multipage bvec, super large bio can be created. And it lead to delay first I/O request issue. Signed-off-by: Changheun Lee <[email protected]> Reviewed-by: Bart Van Assche <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jens Axboe <[email protected]>
1 parent c646790 commit cd2c754

File tree

4 files changed

+21
-3
lines changed

4 files changed

+21
-3
lines changed

block/bio.c

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,13 @@ void bio_init(struct bio *bio, struct bio_vec *table,
255255
}
256256
EXPORT_SYMBOL(bio_init);
257257

258+
unsigned int bio_max_size(struct bio *bio)
259+
{
260+
struct block_device *bdev = bio->bi_bdev;
261+
262+
return bdev ? bdev->bd_disk->queue->limits.bio_max_bytes : UINT_MAX;
263+
}
264+
258265
/**
259266
* bio_reset - reinitialize a bio
260267
* @bio: bio to reset
@@ -866,7 +873,7 @@ bool __bio_try_merge_page(struct bio *bio, struct page *page,
866873
struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
867874

868875
if (page_is_mergeable(bv, page, len, off, same_page)) {
869-
if (bio->bi_iter.bi_size > UINT_MAX - len) {
876+
if (bio->bi_iter.bi_size > bio_max_size(bio) - len) {
870877
*same_page = false;
871878
return false;
872879
}
@@ -995,6 +1002,7 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
9951002
{
9961003
unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt;
9971004
unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt;
1005+
unsigned int bytes_left = bio_max_size(bio) - bio->bi_iter.bi_size;
9981006
struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
9991007
struct page **pages = (struct page **)bv;
10001008
bool same_page = false;
@@ -1010,7 +1018,8 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
10101018
BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2);
10111019
pages += entries_left * (PAGE_PTRS_PER_BVEC - 1);
10121020

1013-
size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
1021+
size = iov_iter_get_pages(iter, pages, bytes_left, nr_pages,
1022+
&offset);
10141023
if (unlikely(size <= 0))
10151024
return size ? size : -EFAULT;
10161025

block/blk-settings.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ EXPORT_SYMBOL_GPL(blk_queue_rq_timeout);
3131
*/
3232
void blk_set_default_limits(struct queue_limits *lim)
3333
{
34+
lim->bio_max_bytes = UINT_MAX;
3435
lim->max_segments = BLK_MAX_SEGMENTS;
3536
lim->max_discard_segments = 1;
3637
lim->max_integrity_segments = 0;
@@ -139,6 +140,10 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_secto
139140
limits->logical_block_size >> SECTOR_SHIFT);
140141
limits->max_sectors = max_sectors;
141142

143+
if (check_shl_overflow(max_sectors, SECTOR_SHIFT,
144+
&limits->bio_max_bytes))
145+
limits->bio_max_bytes = UINT_MAX;
146+
142147
q->backing_dev_info->io_pages = max_sectors >> (PAGE_SHIFT - 9);
143148
}
144149
EXPORT_SYMBOL(blk_queue_max_hw_sectors);

include/linux/bio.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,8 @@ static inline void *bio_data(struct bio *bio)
106106
return NULL;
107107
}
108108

109+
extern unsigned int bio_max_size(struct bio *bio);
110+
109111
/**
110112
* bio_full - check if the bio is full
111113
* @bio: bio to check
@@ -119,7 +121,7 @@ static inline bool bio_full(struct bio *bio, unsigned len)
119121
if (bio->bi_vcnt >= bio->bi_max_vecs)
120122
return true;
121123

122-
if (bio->bi_iter.bi_size > UINT_MAX - len)
124+
if (bio->bi_iter.bi_size > bio_max_size(bio) - len)
123125
return true;
124126

125127
return false;

include/linux/blkdev.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,8 @@ enum blk_bounce {
327327
};
328328

329329
struct queue_limits {
330+
unsigned int bio_max_bytes;
331+
330332
enum blk_bounce bounce;
331333
unsigned long seg_boundary_mask;
332334
unsigned long virt_boundary_mask;

0 commit comments

Comments
 (0)