Skip to content

Commit c15ffe9

Browse files
committed
Merge branch 'for-6.11/io_uring' into for-next
* for-6.11/io_uring: io_uring/msg_ring: add an alloc cache for io_kiocb entries io_uring/msg_ring: improve handling of target CQE posting io_uring: add io_add_aux_cqe() helper io_uring: add remote task_work execution helper io_uring/msg_ring: tighten requirement for remote posting
2 parents 0902a1e + 50cf5f3 commit c15ffe9

File tree

5 files changed

+124
-55
lines changed

5 files changed

+124
-55
lines changed

include/linux/io_uring_types.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -397,6 +397,9 @@ struct io_ring_ctx {
397397
struct callback_head poll_wq_task_work;
398398
struct list_head defer_list;
399399

400+
struct io_alloc_cache msg_cache;
401+
spinlock_t msg_lock;
402+
400403
#ifdef CONFIG_NET_RX_BUSY_POLL
401404
struct list_head napi_list; /* track busy poll napi_id */
402405
spinlock_t napi_lock; /* napi_list lock */

io_uring/io_uring.c

Lines changed: 43 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@
9595
#include "futex.h"
9696
#include "napi.h"
9797
#include "uring_cmd.h"
98+
#include "msg_ring.h"
9899
#include "memmap.h"
99100

100101
#include "timeout.h"
@@ -315,6 +316,9 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
315316
sizeof(struct io_async_rw));
316317
ret |= io_alloc_cache_init(&ctx->uring_cache, IO_ALLOC_CACHE_MAX,
317318
sizeof(struct uring_cache));
319+
spin_lock_init(&ctx->msg_lock);
320+
ret |= io_alloc_cache_init(&ctx->msg_cache, IO_ALLOC_CACHE_MAX,
321+
sizeof(struct io_kiocb));
318322
ret |= io_futex_cache_init(ctx);
319323
if (ret)
320324
goto err;
@@ -351,6 +355,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
351355
io_alloc_cache_free(&ctx->netmsg_cache, io_netmsg_cache_free);
352356
io_alloc_cache_free(&ctx->rw_cache, io_rw_cache_free);
353357
io_alloc_cache_free(&ctx->uring_cache, kfree);
358+
io_alloc_cache_free(&ctx->msg_cache, io_msg_cache_free);
354359
io_futex_cache_free(ctx);
355360
kfree(ctx->cancel_table.hbs);
356361
kfree(ctx->cancel_table_locked.hbs);
@@ -801,19 +806,38 @@ static bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res,
801806
return false;
802807
}
803808

804-
bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
809+
static bool __io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res,
810+
u32 cflags)
805811
{
806812
bool filled;
807813

808-
io_cq_lock(ctx);
809814
filled = io_fill_cqe_aux(ctx, user_data, res, cflags);
810815
if (!filled)
811816
filled = io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0);
812817

818+
return filled;
819+
}
820+
821+
bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
822+
{
823+
bool filled;
824+
825+
io_cq_lock(ctx);
826+
filled = __io_post_aux_cqe(ctx, user_data, res, cflags);
813827
io_cq_unlock_post(ctx);
814828
return filled;
815829
}
816830

831+
/*
832+
* Must be called from inline task_work so we now a flush will happen later,
833+
* and obviously with ctx->uring_lock held (tw always has that).
834+
*/
835+
void io_add_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
836+
{
837+
__io_post_aux_cqe(ctx, user_data, res, cflags);
838+
ctx->submit_state.cq_flush = true;
839+
}
840+
817841
/*
818842
* A helper for multishot requests posting additional CQEs.
819843
* Should only be used from a task_work including IO_URING_F_MULTISHOT.
@@ -1098,9 +1122,10 @@ void tctx_task_work(struct callback_head *cb)
10981122
WARN_ON_ONCE(ret);
10991123
}
11001124

1101-
static inline void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
1125+
static inline void io_req_local_work_add(struct io_kiocb *req,
1126+
struct io_ring_ctx *ctx,
1127+
unsigned flags)
11021128
{
1103-
struct io_ring_ctx *ctx = req->ctx;
11041129
unsigned nr_wait, nr_tw, nr_tw_prev;
11051130
struct llist_node *head;
11061131

@@ -1114,6 +1139,8 @@ static inline void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
11141139
if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK))
11151140
flags &= ~IOU_F_TWQ_LAZY_WAKE;
11161141

1142+
guard(rcu)();
1143+
11171144
head = READ_ONCE(ctx->work_llist.first);
11181145
do {
11191146
nr_tw_prev = 0;
@@ -1195,13 +1222,18 @@ static void io_req_normal_work_add(struct io_kiocb *req)
11951222

11961223
void __io_req_task_work_add(struct io_kiocb *req, unsigned flags)
11971224
{
1198-
if (req->ctx->flags & IORING_SETUP_DEFER_TASKRUN) {
1199-
rcu_read_lock();
1200-
io_req_local_work_add(req, flags);
1201-
rcu_read_unlock();
1202-
} else {
1225+
if (req->ctx->flags & IORING_SETUP_DEFER_TASKRUN)
1226+
io_req_local_work_add(req, req->ctx, flags);
1227+
else
12031228
io_req_normal_work_add(req);
1204-
}
1229+
}
1230+
1231+
void io_req_task_work_add_remote(struct io_kiocb *req, struct io_ring_ctx *ctx,
1232+
unsigned flags)
1233+
{
1234+
if (WARN_ON_ONCE(!(ctx->flags & IORING_SETUP_DEFER_TASKRUN)))
1235+
return;
1236+
io_req_local_work_add(req, ctx, flags);
12051237
}
12061238

12071239
static void __cold io_move_task_work_from_local(struct io_ring_ctx *ctx)
@@ -2572,6 +2604,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
25722604
io_alloc_cache_free(&ctx->netmsg_cache, io_netmsg_cache_free);
25732605
io_alloc_cache_free(&ctx->rw_cache, io_rw_cache_free);
25742606
io_alloc_cache_free(&ctx->uring_cache, kfree);
2607+
io_alloc_cache_free(&ctx->msg_cache, io_msg_cache_free);
25752608
io_futex_cache_free(ctx);
25762609
io_destroy_buffers(ctx);
25772610
mutex_unlock(&ctx->uring_lock);

io_uring/io_uring.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ bool io_cqe_cache_refill(struct io_ring_ctx *ctx, bool overflow);
6565
int io_run_task_work_sig(struct io_ring_ctx *ctx);
6666
void io_req_defer_failed(struct io_kiocb *req, s32 res);
6767
bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags);
68+
void io_add_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags);
6869
bool io_req_post_cqe(struct io_kiocb *req, s32 res, u32 cflags);
6970
void __io_commit_cqring_flush(struct io_ring_ctx *ctx);
7071

@@ -73,6 +74,8 @@ struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
7374
unsigned issue_flags);
7475

7576
void __io_req_task_work_add(struct io_kiocb *req, unsigned flags);
77+
void io_req_task_work_add_remote(struct io_kiocb *req, struct io_ring_ctx *ctx,
78+
unsigned flags);
7679
bool io_alloc_async_data(struct io_kiocb *req);
7780
void io_req_task_queue(struct io_kiocb *req);
7881
void io_req_task_complete(struct io_kiocb *req, struct io_tw_state *ts);

io_uring/msg_ring.c

Lines changed: 74 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@
1111
#include "io_uring.h"
1212
#include "rsrc.h"
1313
#include "filetable.h"
14+
#include "alloc_cache.h"
1415
#include "msg_ring.h"
1516

16-
1717
/* All valid masks for MSG_RING */
1818
#define IORING_MSG_RING_MASK (IORING_MSG_RING_CQE_SKIP | \
1919
IORING_MSG_RING_FLAGS_PASS)
@@ -68,59 +68,65 @@ void io_msg_ring_cleanup(struct io_kiocb *req)
6868

6969
static inline bool io_msg_need_remote(struct io_ring_ctx *target_ctx)
7070
{
71-
if (!target_ctx->task_complete)
72-
return false;
73-
return current != target_ctx->submitter_task;
71+
return target_ctx->task_complete;
7472
}
7573

76-
static int io_msg_exec_remote(struct io_kiocb *req, task_work_func_t func)
74+
static void io_msg_tw_complete(struct io_kiocb *req, struct io_tw_state *ts)
7775
{
78-
struct io_ring_ctx *ctx = req->file->private_data;
79-
struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
80-
struct task_struct *task = READ_ONCE(ctx->submitter_task);
76+
struct io_ring_ctx *ctx = req->ctx;
8177

82-
if (unlikely(!task))
83-
return -EOWNERDEAD;
78+
io_add_aux_cqe(ctx, req->cqe.user_data, req->cqe.res, req->cqe.flags);
79+
if (spin_trylock(&ctx->msg_lock)) {
80+
if (io_alloc_cache_put(&ctx->msg_cache, req))
81+
req = NULL;
82+
spin_unlock(&ctx->msg_lock);
83+
}
84+
if (req)
85+
kfree(req);
86+
percpu_ref_put(&ctx->refs);
87+
}
8488

85-
init_task_work(&msg->tw, func);
86-
if (task_work_add(task, &msg->tw, TWA_SIGNAL))
87-
return -EOWNERDEAD;
89+
static void io_msg_remote_post(struct io_ring_ctx *ctx, struct io_kiocb *req,
90+
int res, u32 cflags, u64 user_data)
91+
{
92+
req->cqe.user_data = user_data;
93+
io_req_set_res(req, res, cflags);
94+
percpu_ref_get(&ctx->refs);
95+
req->ctx = ctx;
96+
req->task = READ_ONCE(ctx->submitter_task);
97+
req->io_task_work.func = io_msg_tw_complete;
98+
io_req_task_work_add_remote(req, ctx, IOU_F_TWQ_LAZY_WAKE);
99+
}
88100

89-
return IOU_ISSUE_SKIP_COMPLETE;
101+
static struct io_kiocb *io_msg_get_kiocb(struct io_ring_ctx *ctx)
102+
{
103+
struct io_kiocb *req = NULL;
104+
105+
if (spin_trylock(&ctx->msg_lock)) {
106+
req = io_alloc_cache_get(&ctx->msg_cache);
107+
spin_unlock(&ctx->msg_lock);
108+
}
109+
if (req)
110+
return req;
111+
return kmem_cache_alloc(req_cachep, GFP_KERNEL | __GFP_NOWARN);
90112
}
91113

92-
static void io_msg_tw_complete(struct callback_head *head)
114+
static int io_msg_data_remote(struct io_kiocb *req)
93115
{
94-
struct io_msg *msg = container_of(head, struct io_msg, tw);
95-
struct io_kiocb *req = cmd_to_io_kiocb(msg);
96116
struct io_ring_ctx *target_ctx = req->file->private_data;
97-
int ret = 0;
98-
99-
if (current->flags & PF_EXITING) {
100-
ret = -EOWNERDEAD;
101-
} else {
102-
u32 flags = 0;
103-
104-
if (msg->flags & IORING_MSG_RING_FLAGS_PASS)
105-
flags = msg->cqe_flags;
106-
107-
/*
108-
* If the target ring is using IOPOLL mode, then we need to be
109-
* holding the uring_lock for posting completions. Other ring
110-
* types rely on the regular completion locking, which is
111-
* handled while posting.
112-
*/
113-
if (target_ctx->flags & IORING_SETUP_IOPOLL)
114-
mutex_lock(&target_ctx->uring_lock);
115-
if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags))
116-
ret = -EOVERFLOW;
117-
if (target_ctx->flags & IORING_SETUP_IOPOLL)
118-
mutex_unlock(&target_ctx->uring_lock);
119-
}
117+
struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
118+
struct io_kiocb *target;
119+
u32 flags = 0;
120120

121-
if (ret < 0)
122-
req_set_fail(req);
123-
io_req_queue_tw_complete(req, ret);
121+
target = io_msg_get_kiocb(req->ctx);
122+
if (unlikely(!target))
123+
return -ENOMEM;
124+
125+
if (msg->flags & IORING_MSG_RING_FLAGS_PASS)
126+
flags = msg->cqe_flags;
127+
128+
io_msg_remote_post(target_ctx, target, msg->len, flags, msg->user_data);
129+
return 0;
124130
}
125131

126132
static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags)
@@ -138,7 +144,7 @@ static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags)
138144
return -EBADFD;
139145

140146
if (io_msg_need_remote(target_ctx))
141-
return io_msg_exec_remote(req, io_msg_tw_complete);
147+
return io_msg_data_remote(req);
142148

143149
if (msg->flags & IORING_MSG_RING_FLAGS_PASS)
144150
flags = msg->cqe_flags;
@@ -218,6 +224,22 @@ static void io_msg_tw_fd_complete(struct callback_head *head)
218224
io_req_queue_tw_complete(req, ret);
219225
}
220226

227+
static int io_msg_fd_remote(struct io_kiocb *req)
228+
{
229+
struct io_ring_ctx *ctx = req->file->private_data;
230+
struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
231+
struct task_struct *task = READ_ONCE(ctx->submitter_task);
232+
233+
if (unlikely(!task))
234+
return -EOWNERDEAD;
235+
236+
init_task_work(&msg->tw, io_msg_tw_fd_complete);
237+
if (task_work_add(task, &msg->tw, TWA_SIGNAL))
238+
return -EOWNERDEAD;
239+
240+
return IOU_ISSUE_SKIP_COMPLETE;
241+
}
242+
221243
static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags)
222244
{
223245
struct io_ring_ctx *target_ctx = req->file->private_data;
@@ -240,7 +262,7 @@ static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags)
240262
}
241263

242264
if (io_msg_need_remote(target_ctx))
243-
return io_msg_exec_remote(req, io_msg_tw_fd_complete);
265+
return io_msg_fd_remote(req);
244266
return io_msg_install_complete(req, issue_flags);
245267
}
246268

@@ -294,3 +316,10 @@ int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags)
294316
io_req_set_res(req, ret, 0);
295317
return IOU_OK;
296318
}
319+
320+
void io_msg_cache_free(const void *entry)
321+
{
322+
struct io_kiocb *req = (struct io_kiocb *) entry;
323+
324+
kmem_cache_free(req_cachep, req);
325+
}

io_uring/msg_ring.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@
33
int io_msg_ring_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
44
int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags);
55
void io_msg_ring_cleanup(struct io_kiocb *req);
6+
void io_msg_cache_free(const void *entry);

0 commit comments

Comments
 (0)