Skip to content

Commit eff5f16

Browse files
committed
Merge tag 'for-6.15/io_uring-reg-vec-20250327' of git://git.kernel.dk/linux
Pull more io_uring updates from Jens Axboe: "Final separate updates for io_uring. This started out as a series of cleanups improvements and improvements for registered buffers, but as the last series of the io_uring changes for 6.15, it also collected a few fixes for the other branches on top: - Add support for vectored fixed/registered buffers. Previously only single segments have been supported for commands, now vectored variants are supported as well. This series includes networking and file read/write support. - Small series unifying return codes across multi and single shot. - Small series cleaning up registerd buffer importing. - Adding support for vectored registered buffers for uring_cmd. - Fix for io-wq handling of command reissue. - Various little fixes and tweaks" * tag 'for-6.15/io_uring-reg-vec-20250327' of git://git.kernel.dk/linux: (25 commits) io_uring/net: fix io_req_post_cqe abuse by send bundle io_uring/net: use REQ_F_IMPORT_BUFFER for send_zc io_uring: move min_events sanitisation io_uring: rename "min" arg in io_iopoll_check() io_uring: open code __io_post_aux_cqe() io_uring: defer iowq cqe overflow via task_work io_uring: fix retry handling off iowq io_uring/net: only import send_zc buffer once io_uring/cmd: introduce io_uring_cmd_import_fixed_vec io_uring/cmd: add iovec cache for commands io_uring/cmd: don't expose entire cmd async data io_uring: rename the data cmd cache io_uring: rely on io_prep_reg_vec for iovec placement io_uring: introduce io_prep_reg_iovec() io_uring: unify STOP_MULTISHOT with IOU_OK io_uring: return -EAGAIN to continue multishot io_uring: cap cached iovec/bvec size io_uring/net: implement vectored reg bufs for zctx io_uring/net: convert to struct iou_vec io_uring/net: pull vec alloc out of msghdr import ...
2 parents 6df9d08 + 6889ae1 commit eff5f16

File tree

16 files changed

+567
-211
lines changed

16 files changed

+567
-211
lines changed

include/linux/io_uring/cmd.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,11 @@ int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
4343
struct iov_iter *iter,
4444
struct io_uring_cmd *ioucmd,
4545
unsigned int issue_flags);
46+
int io_uring_cmd_import_fixed_vec(struct io_uring_cmd *ioucmd,
47+
const struct iovec __user *uvec,
48+
size_t uvec_segs,
49+
int ddir, struct iov_iter *iter,
50+
unsigned issue_flags);
4651

4752
/*
4853
* Completes the request, i.e. posts an io_uring CQE and deallocates @ioucmd
@@ -76,6 +81,14 @@ io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
7681
{
7782
return -EOPNOTSUPP;
7883
}
84+
static inline int io_uring_cmd_import_fixed_vec(struct io_uring_cmd *ioucmd,
85+
const struct iovec __user *uvec,
86+
size_t uvec_segs,
87+
int ddir, struct iov_iter *iter,
88+
unsigned issue_flags)
89+
{
90+
return -EOPNOTSUPP;
91+
}
7992
static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret,
8093
u64 ret2, unsigned issue_flags)
8194
{

include/linux/io_uring_types.h

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,14 @@ struct io_uring_task {
110110
} ____cacheline_aligned_in_smp;
111111
};
112112

113+
struct iou_vec {
114+
union {
115+
struct iovec *iovec;
116+
struct bio_vec *bvec;
117+
};
118+
unsigned nr; /* number of struct iovec it can hold */
119+
};
120+
113121
struct io_uring {
114122
u32 head;
115123
u32 tail;
@@ -310,7 +318,7 @@ struct io_ring_ctx {
310318
struct io_alloc_cache apoll_cache;
311319
struct io_alloc_cache netmsg_cache;
312320
struct io_alloc_cache rw_cache;
313-
struct io_alloc_cache uring_cache;
321+
struct io_alloc_cache cmd_cache;
314322

315323
/*
316324
* Any cancelable uring_cmd is added to this list in
@@ -482,6 +490,7 @@ enum {
482490
REQ_F_SKIP_LINK_CQES_BIT,
483491
REQ_F_SINGLE_POLL_BIT,
484492
REQ_F_DOUBLE_POLL_BIT,
493+
REQ_F_MULTISHOT_BIT,
485494
REQ_F_APOLL_MULTISHOT_BIT,
486495
REQ_F_CLEAR_POLLIN_BIT,
487496
/* keep async read/write and isreg together and in order */
@@ -494,6 +503,7 @@ enum {
494503
REQ_F_BUFFERS_COMMIT_BIT,
495504
REQ_F_BUF_NODE_BIT,
496505
REQ_F_HAS_METADATA_BIT,
506+
REQ_F_IMPORT_BUFFER_BIT,
497507

498508
/* not a real bit, just to check we're not overflowing the space */
499509
__REQ_F_LAST_BIT,
@@ -558,6 +568,8 @@ enum {
558568
REQ_F_SINGLE_POLL = IO_REQ_FLAG(REQ_F_SINGLE_POLL_BIT),
559569
/* double poll may active */
560570
REQ_F_DOUBLE_POLL = IO_REQ_FLAG(REQ_F_DOUBLE_POLL_BIT),
571+
/* request posts multiple completions, should be set at prep time */
572+
REQ_F_MULTISHOT = IO_REQ_FLAG(REQ_F_MULTISHOT_BIT),
561573
/* fast poll multishot mode */
562574
REQ_F_APOLL_MULTISHOT = IO_REQ_FLAG(REQ_F_APOLL_MULTISHOT_BIT),
563575
/* recvmsg special flag, clear EPOLLIN */
@@ -576,6 +588,11 @@ enum {
576588
REQ_F_BUF_NODE = IO_REQ_FLAG(REQ_F_BUF_NODE_BIT),
577589
/* request has read/write metadata assigned */
578590
REQ_F_HAS_METADATA = IO_REQ_FLAG(REQ_F_HAS_METADATA_BIT),
591+
/*
592+
* For vectored fixed buffers, resolve iovec to registered buffers.
593+
* For SEND_ZC, whether to import buffers (i.e. the first issue).
594+
*/
595+
REQ_F_IMPORT_BUFFER = IO_REQ_FLAG(REQ_F_IMPORT_BUFFER_BIT),
579596
};
580597

581598
typedef void (*io_req_tw_func_t)(struct io_kiocb *req, io_tw_token_t tw);

include/uapi/linux/io_uring.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,8 @@ enum io_uring_op {
281281
IORING_OP_LISTEN,
282282
IORING_OP_RECV_ZC,
283283
IORING_OP_EPOLL_WAIT,
284+
IORING_OP_READV_FIXED,
285+
IORING_OP_WRITEV_FIXED,
284286

285287
/* this goes last, obviously */
286288
IORING_OP_LAST,

io_uring/alloc_cache.h

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,6 @@ bool io_alloc_cache_init(struct io_alloc_cache *cache,
1616

1717
void *io_cache_alloc_new(struct io_alloc_cache *cache, gfp_t gfp);
1818

19-
static inline void io_alloc_cache_kasan(struct iovec **iov, int *nr)
20-
{
21-
if (IS_ENABLED(CONFIG_KASAN)) {
22-
kfree(*iov);
23-
*iov = NULL;
24-
*nr = 0;
25-
}
26-
}
27-
2819
static inline bool io_alloc_cache_put(struct io_alloc_cache *cache,
2920
void *entry)
3021
{

io_uring/io_uring.c

Lines changed: 27 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,7 @@ static void io_free_alloc_caches(struct io_ring_ctx *ctx)
289289
io_alloc_cache_free(&ctx->apoll_cache, kfree);
290290
io_alloc_cache_free(&ctx->netmsg_cache, io_netmsg_cache_free);
291291
io_alloc_cache_free(&ctx->rw_cache, io_rw_cache_free);
292-
io_alloc_cache_free(&ctx->uring_cache, kfree);
292+
io_alloc_cache_free(&ctx->cmd_cache, io_cmd_cache_free);
293293
io_alloc_cache_free(&ctx->msg_cache, kfree);
294294
io_futex_cache_free(ctx);
295295
io_rsrc_cache_free(ctx);
@@ -334,8 +334,9 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
334334
ret |= io_alloc_cache_init(&ctx->rw_cache, IO_ALLOC_CACHE_MAX,
335335
sizeof(struct io_async_rw),
336336
offsetof(struct io_async_rw, clear));
337-
ret |= io_alloc_cache_init(&ctx->uring_cache, IO_ALLOC_CACHE_MAX,
338-
sizeof(struct io_uring_cmd_data), 0);
337+
ret |= io_alloc_cache_init(&ctx->cmd_cache, IO_ALLOC_CACHE_MAX,
338+
sizeof(struct io_async_cmd),
339+
sizeof(struct io_async_cmd));
339340
spin_lock_init(&ctx->msg_lock);
340341
ret |= io_alloc_cache_init(&ctx->msg_cache, IO_ALLOC_CACHE_MAX,
341342
sizeof(struct io_kiocb), 0);
@@ -833,24 +834,14 @@ static bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res,
833834
return false;
834835
}
835836

836-
static bool __io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res,
837-
u32 cflags)
837+
bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
838838
{
839839
bool filled;
840840

841+
io_cq_lock(ctx);
841842
filled = io_fill_cqe_aux(ctx, user_data, res, cflags);
842843
if (!filled)
843844
filled = io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0);
844-
845-
return filled;
846-
}
847-
848-
bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
849-
{
850-
bool filled;
851-
852-
io_cq_lock(ctx);
853-
filled = __io_post_aux_cqe(ctx, user_data, res, cflags);
854845
io_cq_unlock_post(ctx);
855846
return filled;
856847
}
@@ -891,6 +882,7 @@ bool io_req_post_cqe(struct io_kiocb *req, s32 res, u32 cflags)
891882
static void io_req_complete_post(struct io_kiocb *req, unsigned issue_flags)
892883
{
893884
struct io_ring_ctx *ctx = req->ctx;
885+
bool completed = true;
894886

895887
/*
896888
* All execution paths but io-wq use the deferred completions by
@@ -903,19 +895,21 @@ static void io_req_complete_post(struct io_kiocb *req, unsigned issue_flags)
903895
* Handle special CQ sync cases via task_work. DEFER_TASKRUN requires
904896
* the submitter task context, IOPOLL protects with uring_lock.
905897
*/
906-
if (ctx->lockless_cq) {
898+
if (ctx->lockless_cq || (req->flags & REQ_F_REISSUE)) {
899+
defer_complete:
907900
req->io_task_work.func = io_req_task_complete;
908901
io_req_task_work_add(req);
909902
return;
910903
}
911904

912905
io_cq_lock(ctx);
913-
if (!(req->flags & REQ_F_CQE_SKIP)) {
914-
if (!io_fill_cqe_req(ctx, req))
915-
io_req_cqe_overflow(req);
916-
}
906+
if (!(req->flags & REQ_F_CQE_SKIP))
907+
completed = io_fill_cqe_req(ctx, req);
917908
io_cq_unlock_post(ctx);
918909

910+
if (!completed)
911+
goto defer_complete;
912+
919913
/*
920914
* We don't free the request here because we know it's called from
921915
* io-wq only, which holds a reference, so it cannot be the last put.
@@ -1511,11 +1505,13 @@ static __cold void io_iopoll_try_reap_events(struct io_ring_ctx *ctx)
15111505
mutex_unlock(&ctx->uring_lock);
15121506
}
15131507

1514-
static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
1508+
static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned int min_events)
15151509
{
15161510
unsigned int nr_events = 0;
15171511
unsigned long check_cq;
15181512

1513+
min_events = min(min_events, ctx->cq_entries);
1514+
15191515
lockdep_assert_held(&ctx->uring_lock);
15201516

15211517
if (!io_allowed_run_tw(ctx))
@@ -1557,7 +1553,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
15571553
io_task_work_pending(ctx)) {
15581554
u32 tail = ctx->cached_cq_tail;
15591555

1560-
(void) io_run_local_work_locked(ctx, min);
1556+
(void) io_run_local_work_locked(ctx, min_events);
15611557

15621558
if (task_work_pending(current) ||
15631559
wq_list_empty(&ctx->iopoll_list)) {
@@ -1570,7 +1566,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
15701566
wq_list_empty(&ctx->iopoll_list))
15711567
break;
15721568
}
1573-
ret = io_do_iopoll(ctx, !min);
1569+
ret = io_do_iopoll(ctx, !min_events);
15741570
if (unlikely(ret < 0))
15751571
return ret;
15761572

@@ -1580,7 +1576,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
15801576
break;
15811577

15821578
nr_events += ret;
1583-
} while (nr_events < min);
1579+
} while (nr_events < min_events);
15841580

15851581
return 0;
15861582
}
@@ -1791,10 +1787,7 @@ int io_poll_issue(struct io_kiocb *req, io_tw_token_t tw)
17911787

17921788
ret = __io_issue_sqe(req, issue_flags, &io_issue_defs[req->opcode]);
17931789

1794-
WARN_ON_ONCE(ret == IOU_OK);
1795-
1796-
if (ret == IOU_ISSUE_SKIP_COMPLETE)
1797-
ret = 0;
1790+
WARN_ON_ONCE(ret == IOU_ISSUE_SKIP_COMPLETE);
17981791
return ret;
17991792
}
18001793

@@ -1847,7 +1840,7 @@ void io_wq_submit_work(struct io_wq_work *work)
18471840
* Don't allow any multishot execution from io-wq. It's more restrictive
18481841
* than necessary and also cleaner.
18491842
*/
1850-
if (req->flags & REQ_F_APOLL_MULTISHOT) {
1843+
if (req->flags & (REQ_F_MULTISHOT|REQ_F_APOLL_MULTISHOT)) {
18511844
err = -EBADFD;
18521845
if (!io_file_can_poll(req))
18531846
goto fail;
@@ -1858,7 +1851,7 @@ void io_wq_submit_work(struct io_wq_work *work)
18581851
goto fail;
18591852
return;
18601853
} else {
1861-
req->flags &= ~REQ_F_APOLL_MULTISHOT;
1854+
req->flags &= ~(REQ_F_APOLL_MULTISHOT|REQ_F_MULTISHOT);
18621855
}
18631856
}
18641857

@@ -2549,6 +2542,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
25492542
ktime_t start_time;
25502543
int ret;
25512544

2545+
min_events = min_t(int, min_events, ctx->cq_entries);
2546+
25522547
if (!io_allowed_run_tw(ctx))
25532548
return -EEXIST;
25542549
if (io_local_work_pending(ctx))
@@ -3435,22 +3430,16 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
34353430
mutex_lock(&ctx->uring_lock);
34363431
iopoll_locked:
34373432
ret2 = io_validate_ext_arg(ctx, flags, argp, argsz);
3438-
if (likely(!ret2)) {
3439-
min_complete = min(min_complete,
3440-
ctx->cq_entries);
3433+
if (likely(!ret2))
34413434
ret2 = io_iopoll_check(ctx, min_complete);
3442-
}
34433435
mutex_unlock(&ctx->uring_lock);
34443436
} else {
34453437
struct ext_arg ext_arg = { .argsz = argsz };
34463438

34473439
ret2 = io_get_ext_arg(ctx, flags, argp, &ext_arg);
3448-
if (likely(!ret2)) {
3449-
min_complete = min(min_complete,
3450-
ctx->cq_entries);
3440+
if (likely(!ret2))
34513441
ret2 = io_cqring_wait(ctx, min_complete, flags,
34523442
&ext_arg);
3453-
}
34543443
}
34553444

34563445
if (!ret) {

io_uring/io_uring.h

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,22 +19,25 @@
1919
#endif
2020

2121
enum {
22-
IOU_OK = 0,
22+
IOU_OK = 0, /* deprecated, use IOU_COMPLETE */
23+
IOU_COMPLETE = 0,
24+
2325
IOU_ISSUE_SKIP_COMPLETE = -EIOCBQUEUED,
2426

27+
/*
28+
* The request has more work to do and should be retried. io_uring will
29+
* attempt to wait on the file for eligible opcodes, but otherwise
30+
* it'll be handed to iowq for blocking execution. It works for normal
31+
* requests as well as for the multi shot mode.
32+
*/
33+
IOU_RETRY = -EAGAIN,
34+
2535
/*
2636
* Requeue the task_work to restart operations on this request. The
2737
* actual value isn't important, should just be not an otherwise
2838
* valid error code, yet less than -MAX_ERRNO and valid internally.
2939
*/
3040
IOU_REQUEUE = -3072,
31-
32-
/*
33-
* Intended only when both IO_URING_F_MULTISHOT is passed
34-
* to indicate to the poll runner that multishot should be
35-
* removed and the result is set on req->cqe.res.
36-
*/
37-
IOU_STOP_MULTISHOT = -ECANCELED,
3841
};
3942

4043
struct io_wait_queue {

0 commit comments

Comments
 (0)