Skip to content

Commit 57bebf8

Browse files
isilenceaxboe
authored andcommitted
io_uring/rsrc: optimise registered huge pages
When registering huge pages, internally io_uring will split them into many PAGE_SIZE bvec entries. That's bad for performance as drivers need to eventually dma-map the data and will do it individually for each bvec entry. Coalesce huge pages into one large bvec. Signed-off-by: Pavel Begunkov <[email protected]> Signed-off-by: Jens Axboe <[email protected]>
1 parent b000ae0 commit 57bebf8

File tree

1 file changed

+32
-6
lines changed

1 file changed

+32
-6
lines changed

io_uring/rsrc.c

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1210,6 +1210,7 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
12101210
unsigned long off;
12111211
size_t size;
12121212
int ret, nr_pages, i;
1213+
struct folio *folio;
12131214

12141215
*pimu = ctx->dummy_ubuf;
12151216
if (!iov->iov_base)
@@ -1224,6 +1225,21 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
12241225
goto done;
12251226
}
12261227

1228+
/* If it's a huge page, try to coalesce them into a single bvec entry */
1229+
if (nr_pages > 1) {
1230+
folio = page_folio(pages[0]);
1231+
for (i = 1; i < nr_pages; i++) {
1232+
if (page_folio(pages[i]) != folio) {
1233+
folio = NULL;
1234+
break;
1235+
}
1236+
}
1237+
if (folio) {
1238+
folio_put_refs(folio, nr_pages - 1);
1239+
nr_pages = 1;
1240+
}
1241+
}
1242+
12271243
imu = kvmalloc(struct_size(imu, bvec, nr_pages), GFP_KERNEL);
12281244
if (!imu)
12291245
goto done;
@@ -1236,6 +1252,17 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
12361252

12371253
off = (unsigned long) iov->iov_base & ~PAGE_MASK;
12381254
size = iov->iov_len;
1255+
/* store original address for later verification */
1256+
imu->ubuf = (unsigned long) iov->iov_base;
1257+
imu->ubuf_end = imu->ubuf + iov->iov_len;
1258+
imu->nr_bvecs = nr_pages;
1259+
*pimu = imu;
1260+
ret = 0;
1261+
1262+
if (folio) {
1263+
bvec_set_page(&imu->bvec[0], pages[0], size, off);
1264+
goto done;
1265+
}
12391266
for (i = 0; i < nr_pages; i++) {
12401267
size_t vec_len;
12411268

@@ -1244,12 +1271,6 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
12441271
off = 0;
12451272
size -= vec_len;
12461273
}
1247-
/* store original address for later verification */
1248-
imu->ubuf = (unsigned long) iov->iov_base;
1249-
imu->ubuf_end = imu->ubuf + iov->iov_len;
1250-
imu->nr_bvecs = nr_pages;
1251-
*pimu = imu;
1252-
ret = 0;
12531274
done:
12541275
if (ret)
12551276
kvfree(imu);
@@ -1364,6 +1385,11 @@ int io_import_fixed(int ddir, struct iov_iter *iter,
13641385
const struct bio_vec *bvec = imu->bvec;
13651386

13661387
if (offset <= bvec->bv_len) {
1388+
/*
1389+
* Note, huge pages buffers consists of one large
1390+
* bvec entry and should always go this way. The other
1391+
* branch doesn't expect non PAGE_SIZE'd chunks.
1392+
*/
13671393
iter->bvec = bvec;
13681394
iter->nr_segs = bvec->bv_len;
13691395
iter->count -= offset;

0 commit comments

Comments
 (0)