Skip to content

Commit 0a47e02

Browse files
committed
Merge tag 'io_uring-6.16-20250626' of git://git.kernel.dk/linux
Pull io_uring fixes from Jens Axboe: - Two tweaks for a recent fix: fixing a memory leak if multiple iovecs were initially mapped but only the first was used and hence turned into a UBUF rathan than an IOVEC iterator, and catching a case where a retry would be done even if the previous segment wasn't full - Small series fixing an issue making the vm unhappy if debugging is turned on, hitting a VM_BUG_ON_PAGE() - Fix a resource leak in io_import_dmabuf() in the error handling case, which is a regression in this merge window - Mark fallocate as needing to be write serialized, as is already done for truncate and buffered writes * tag 'io_uring-6.16-20250626' of git://git.kernel.dk/linux: io_uring/kbuf: flag partial buffer mappings io_uring/net: mark iov as dynamically allocated even for single segments io_uring: fix resource leak in io_import_dmabuf() io_uring: don't assume uaddr alignment in io_vec_fill_bvec io_uring/rsrc: don't rely on user vaddr alignment io_uring/rsrc: fix folio unpinning io_uring: make fallocate be hashed work
2 parents 9c7331f + 178b8ff commit 0a47e02

File tree

7 files changed

+52
-24
lines changed

7 files changed

+52
-24
lines changed

io_uring/kbuf.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,7 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg,
271271
if (len > arg->max_len) {
272272
len = arg->max_len;
273273
if (!(bl->flags & IOBL_INC)) {
274+
arg->partial_map = 1;
274275
if (iov != arg->iovs)
275276
break;
276277
buf->len = len;

io_uring/kbuf.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,8 @@ struct buf_sel_arg {
5858
size_t max_len;
5959
unsigned short nr_iovs;
6060
unsigned short mode;
61-
unsigned buf_group;
61+
unsigned short buf_group;
62+
unsigned short partial_map;
6263
};
6364

6465
void __user *io_buffer_select(struct io_kiocb *req, size_t *len,

io_uring/net.c

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -75,12 +75,17 @@ struct io_sr_msg {
7575
u16 flags;
7676
/* initialised and used only by !msg send variants */
7777
u16 buf_group;
78-
bool retry;
78+
unsigned short retry_flags;
7979
void __user *msg_control;
8080
/* used only for send zerocopy */
8181
struct io_kiocb *notif;
8282
};
8383

84+
enum sr_retry_flags {
85+
IO_SR_MSG_RETRY = 1,
86+
IO_SR_MSG_PARTIAL_MAP = 2,
87+
};
88+
8489
/*
8590
* Number of times we'll try and do receives if there's more data. If we
8691
* exceed this limit, then add us to the back of the queue and retry from
@@ -187,7 +192,7 @@ static inline void io_mshot_prep_retry(struct io_kiocb *req,
187192

188193
req->flags &= ~REQ_F_BL_EMPTY;
189194
sr->done_io = 0;
190-
sr->retry = false;
195+
sr->retry_flags = 0;
191196
sr->len = 0; /* get from the provided buffer */
192197
}
193198

@@ -397,7 +402,7 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
397402
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
398403

399404
sr->done_io = 0;
400-
sr->retry = false;
405+
sr->retry_flags = 0;
401406
sr->len = READ_ONCE(sqe->len);
402407
sr->flags = READ_ONCE(sqe->ioprio);
403408
if (sr->flags & ~SENDMSG_FLAGS)
@@ -751,7 +756,7 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
751756
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
752757

753758
sr->done_io = 0;
754-
sr->retry = false;
759+
sr->retry_flags = 0;
755760

756761
if (unlikely(sqe->file_index || sqe->addr2))
757762
return -EINVAL;
@@ -823,7 +828,7 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
823828

824829
cflags |= io_put_kbufs(req, this_ret, io_bundle_nbufs(kmsg, this_ret),
825830
issue_flags);
826-
if (sr->retry)
831+
if (sr->retry_flags & IO_SR_MSG_RETRY)
827832
cflags = req->cqe.flags | (cflags & CQE_F_MASK);
828833
/* bundle with no more immediate buffers, we're done */
829834
if (req->flags & REQ_F_BL_EMPTY)
@@ -832,12 +837,12 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
832837
* If more is available AND it was a full transfer, retry and
833838
* append to this one
834839
*/
835-
if (!sr->retry && kmsg->msg.msg_inq > 1 && this_ret > 0 &&
840+
if (!sr->retry_flags && kmsg->msg.msg_inq > 1 && this_ret > 0 &&
836841
!iov_iter_count(&kmsg->msg.msg_iter)) {
837842
req->cqe.flags = cflags & ~CQE_F_MASK;
838843
sr->len = kmsg->msg.msg_inq;
839844
sr->done_io += this_ret;
840-
sr->retry = true;
845+
sr->retry_flags |= IO_SR_MSG_RETRY;
841846
return false;
842847
}
843848
} else {
@@ -1077,6 +1082,14 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg
10771082
if (unlikely(ret < 0))
10781083
return ret;
10791084

1085+
if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) {
1086+
kmsg->vec.nr = ret;
1087+
kmsg->vec.iovec = arg.iovs;
1088+
req->flags |= REQ_F_NEED_CLEANUP;
1089+
}
1090+
if (arg.partial_map)
1091+
sr->retry_flags |= IO_SR_MSG_PARTIAL_MAP;
1092+
10801093
/* special case 1 vec, can be a fast path */
10811094
if (ret == 1) {
10821095
sr->buf = arg.iovs[0].iov_base;
@@ -1085,11 +1098,6 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg
10851098
}
10861099
iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret,
10871100
arg.out_len);
1088-
if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) {
1089-
kmsg->vec.nr = ret;
1090-
kmsg->vec.iovec = arg.iovs;
1091-
req->flags |= REQ_F_NEED_CLEANUP;
1092-
}
10931101
} else {
10941102
void __user *buf;
10951103

@@ -1275,7 +1283,7 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
12751283
int ret;
12761284

12771285
zc->done_io = 0;
1278-
zc->retry = false;
1286+
zc->retry_flags = 0;
12791287

12801288
if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)))
12811289
return -EINVAL;

io_uring/opdef.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,7 @@ const struct io_issue_def io_issue_defs[] = {
216216
},
217217
[IORING_OP_FALLOCATE] = {
218218
.needs_file = 1,
219+
.hash_reg_file = 1,
219220
.prep = io_fallocate_prep,
220221
.issue = io_fallocate,
221222
},

io_uring/rsrc.c

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -112,8 +112,11 @@ static void io_release_ubuf(void *priv)
112112
struct io_mapped_ubuf *imu = priv;
113113
unsigned int i;
114114

115-
for (i = 0; i < imu->nr_bvecs; i++)
116-
unpin_user_page(imu->bvec[i].bv_page);
115+
for (i = 0; i < imu->nr_bvecs; i++) {
116+
struct folio *folio = page_folio(imu->bvec[i].bv_page);
117+
118+
unpin_user_folio(folio, 1);
119+
}
117120
}
118121

119122
static struct io_mapped_ubuf *io_alloc_imu(struct io_ring_ctx *ctx,
@@ -731,6 +734,7 @@ bool io_check_coalesce_buffer(struct page **page_array, int nr_pages,
731734

732735
data->nr_pages_mid = folio_nr_pages(folio);
733736
data->folio_shift = folio_shift(folio);
737+
data->first_folio_page_idx = folio_page_idx(folio, page_array[0]);
734738

735739
/*
736740
* Check if pages are contiguous inside a folio, and all folios have
@@ -824,7 +828,11 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
824828
if (coalesced)
825829
imu->folio_shift = data.folio_shift;
826830
refcount_set(&imu->refs, 1);
827-
off = (unsigned long) iov->iov_base & ((1UL << imu->folio_shift) - 1);
831+
832+
off = (unsigned long)iov->iov_base & ~PAGE_MASK;
833+
if (coalesced)
834+
off += data.first_folio_page_idx << PAGE_SHIFT;
835+
828836
node->buf = imu;
829837
ret = 0;
830838

@@ -840,8 +848,10 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
840848
if (ret) {
841849
if (imu)
842850
io_free_imu(ctx, imu);
843-
if (pages)
844-
unpin_user_pages(pages, nr_pages);
851+
if (pages) {
852+
for (i = 0; i < nr_pages; i++)
853+
unpin_user_folio(page_folio(pages[i]), 1);
854+
}
845855
io_cache_free(&ctx->node_cache, node);
846856
node = ERR_PTR(ret);
847857
}
@@ -1329,7 +1339,6 @@ static int io_vec_fill_bvec(int ddir, struct iov_iter *iter,
13291339
{
13301340
unsigned long folio_size = 1 << imu->folio_shift;
13311341
unsigned long folio_mask = folio_size - 1;
1332-
u64 folio_addr = imu->ubuf & ~folio_mask;
13331342
struct bio_vec *res_bvec = vec->bvec;
13341343
size_t total_len = 0;
13351344
unsigned bvec_idx = 0;
@@ -1351,8 +1360,13 @@ static int io_vec_fill_bvec(int ddir, struct iov_iter *iter,
13511360
if (unlikely(check_add_overflow(total_len, iov_len, &total_len)))
13521361
return -EOVERFLOW;
13531362

1354-
/* by using folio address it also accounts for bvec offset */
1355-
offset = buf_addr - folio_addr;
1363+
offset = buf_addr - imu->ubuf;
1364+
/*
1365+
* Only the first bvec can have non zero bv_offset, account it
1366+
* here and work with full folios below.
1367+
*/
1368+
offset += imu->bvec[0].bv_offset;
1369+
13561370
src_bvec = imu->bvec + (offset >> imu->folio_shift);
13571371
offset &= folio_mask;
13581372

io_uring/rsrc.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ struct io_imu_folio_data {
4949
unsigned int nr_pages_mid;
5050
unsigned int folio_shift;
5151
unsigned int nr_folios;
52+
unsigned long first_folio_page_idx;
5253
};
5354

5455
bool io_rsrc_cache_init(struct io_ring_ctx *ctx);

io_uring/zcrx.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,8 +106,10 @@ static int io_import_dmabuf(struct io_zcrx_ifq *ifq,
106106
for_each_sgtable_dma_sg(mem->sgt, sg, i)
107107
total_size += sg_dma_len(sg);
108108

109-
if (total_size < off + len)
110-
return -EINVAL;
109+
if (total_size < off + len) {
110+
ret = -EINVAL;
111+
goto err;
112+
}
111113

112114
mem->dmabuf_offset = off;
113115
mem->size = len;

0 commit comments

Comments
 (0)