Skip to content

Commit b5d951c

Browse files
committed
Merge branch 'for-6.11/io_uring' into for-next
* for-6.11/io_uring: io_uring/io-wq: make io_wq_work flags atomic io_uring: use 'state' consistently io_uring/eventfd: move eventfd handling to separate file io_uring/eventfd: move to more idiomatic RCU free usage io_uring/rsrc: Drop io_copy_iov in favor of iovec API io_uring: Drop per-ctx dummy_ubuf
2 parents d20f6b3 + 940b8c4 commit b5d951c

File tree

10 files changed

+213
-208
lines changed

10 files changed

+213
-208
lines changed

include/linux/io_uring_types.h

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ struct io_wq_work_list {
5050

5151
struct io_wq_work {
5252
struct io_wq_work_node list;
53-
unsigned flags;
53+
atomic_t flags;
5454
/* place it here instead of io_kiocb as it fills padding and saves 4B */
5555
int cancel_seq;
5656
};
@@ -211,14 +211,6 @@ struct io_submit_state {
211211
struct blk_plug plug;
212212
};
213213

214-
struct io_ev_fd {
215-
struct eventfd_ctx *cq_ev_fd;
216-
unsigned int eventfd_async: 1;
217-
struct rcu_head rcu;
218-
atomic_t refs;
219-
atomic_t ops;
220-
};
221-
222214
struct io_alloc_cache {
223215
void **entries;
224216
unsigned int nr_cached;
@@ -373,7 +365,6 @@ struct io_ring_ctx {
373365
struct io_restriction restrictions;
374366

375367
/* slow path rsrc auxilary data, used by update/register */
376-
struct io_mapped_ubuf *dummy_ubuf;
377368
struct io_rsrc_data *file_data;
378369
struct io_rsrc_data *buf_data;
379370

io_uring/Makefile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44

55
obj-$(CONFIG_IO_URING) += io_uring.o opdef.o kbuf.o rsrc.o notif.o \
66
tctx.o filetable.o rw.o net.o poll.o \
7-
uring_cmd.o openclose.o sqpoll.o \
8-
xattr.o nop.o fs.o splice.o sync.o \
9-
msg_ring.o advise.o openclose.o \
7+
eventfd.o uring_cmd.o openclose.o \
8+
sqpoll.o xattr.o nop.o fs.o splice.o \
9+
sync.o msg_ring.o advise.o openclose.o \
1010
epoll.o statx.o timeout.o fdinfo.o \
1111
cancel.o waitid.o register.o \
1212
truncate.o memmap.o

io_uring/eventfd.c

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
#include <linux/kernel.h>
3+
#include <linux/errno.h>
4+
#include <linux/mm.h>
5+
#include <linux/slab.h>
6+
#include <linux/eventfd.h>
7+
#include <linux/eventpoll.h>
8+
#include <linux/io_uring.h>
9+
#include <linux/io_uring_types.h>
10+
11+
#include "io-wq.h"
12+
#include "eventfd.h"
13+
14+
struct io_ev_fd {
15+
struct eventfd_ctx *cq_ev_fd;
16+
unsigned int eventfd_async: 1;
17+
struct rcu_head rcu;
18+
atomic_t refs;
19+
atomic_t ops;
20+
};
21+
22+
enum {
23+
IO_EVENTFD_OP_SIGNAL_BIT,
24+
};
25+
26+
static void io_eventfd_free(struct rcu_head *rcu)
27+
{
28+
struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
29+
30+
eventfd_ctx_put(ev_fd->cq_ev_fd);
31+
kfree(ev_fd);
32+
}
33+
34+
static void io_eventfd_do_signal(struct rcu_head *rcu)
35+
{
36+
struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
37+
38+
eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
39+
40+
if (atomic_dec_and_test(&ev_fd->refs))
41+
io_eventfd_free(rcu);
42+
}
43+
44+
void io_eventfd_signal(struct io_ring_ctx *ctx)
45+
{
46+
struct io_ev_fd *ev_fd = NULL;
47+
48+
if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
49+
return;
50+
51+
guard(rcu)();
52+
53+
/*
54+
* rcu_dereference ctx->io_ev_fd once and use it for both for checking
55+
* and eventfd_signal
56+
*/
57+
ev_fd = rcu_dereference(ctx->io_ev_fd);
58+
59+
/*
60+
* Check again if ev_fd exists incase an io_eventfd_unregister call
61+
* completed between the NULL check of ctx->io_ev_fd at the start of
62+
* the function and rcu_read_lock.
63+
*/
64+
if (unlikely(!ev_fd))
65+
return;
66+
if (!atomic_inc_not_zero(&ev_fd->refs))
67+
return;
68+
if (ev_fd->eventfd_async && !io_wq_current_is_worker())
69+
goto out;
70+
71+
if (likely(eventfd_signal_allowed())) {
72+
eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
73+
} else {
74+
if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops)) {
75+
call_rcu_hurry(&ev_fd->rcu, io_eventfd_do_signal);
76+
return;
77+
}
78+
}
79+
out:
80+
if (atomic_dec_and_test(&ev_fd->refs))
81+
call_rcu(&ev_fd->rcu, io_eventfd_free);
82+
}
83+
84+
void io_eventfd_flush_signal(struct io_ring_ctx *ctx)
85+
{
86+
bool skip;
87+
88+
spin_lock(&ctx->completion_lock);
89+
90+
/*
91+
* Eventfd should only get triggered when at least one event has been
92+
* posted. Some applications rely on the eventfd notification count
93+
* only changing IFF a new CQE has been added to the CQ ring. There's
94+
* no depedency on 1:1 relationship between how many times this
95+
* function is called (and hence the eventfd count) and number of CQEs
96+
* posted to the CQ ring.
97+
*/
98+
skip = ctx->cached_cq_tail == ctx->evfd_last_cq_tail;
99+
ctx->evfd_last_cq_tail = ctx->cached_cq_tail;
100+
spin_unlock(&ctx->completion_lock);
101+
if (skip)
102+
return;
103+
104+
io_eventfd_signal(ctx);
105+
}
106+
107+
int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
108+
unsigned int eventfd_async)
109+
{
110+
struct io_ev_fd *ev_fd;
111+
__s32 __user *fds = arg;
112+
int fd;
113+
114+
ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
115+
lockdep_is_held(&ctx->uring_lock));
116+
if (ev_fd)
117+
return -EBUSY;
118+
119+
if (copy_from_user(&fd, fds, sizeof(*fds)))
120+
return -EFAULT;
121+
122+
ev_fd = kmalloc(sizeof(*ev_fd), GFP_KERNEL);
123+
if (!ev_fd)
124+
return -ENOMEM;
125+
126+
ev_fd->cq_ev_fd = eventfd_ctx_fdget(fd);
127+
if (IS_ERR(ev_fd->cq_ev_fd)) {
128+
int ret = PTR_ERR(ev_fd->cq_ev_fd);
129+
kfree(ev_fd);
130+
return ret;
131+
}
132+
133+
spin_lock(&ctx->completion_lock);
134+
ctx->evfd_last_cq_tail = ctx->cached_cq_tail;
135+
spin_unlock(&ctx->completion_lock);
136+
137+
ev_fd->eventfd_async = eventfd_async;
138+
ctx->has_evfd = true;
139+
atomic_set(&ev_fd->refs, 1);
140+
atomic_set(&ev_fd->ops, 0);
141+
rcu_assign_pointer(ctx->io_ev_fd, ev_fd);
142+
return 0;
143+
}
144+
145+
int io_eventfd_unregister(struct io_ring_ctx *ctx)
146+
{
147+
struct io_ev_fd *ev_fd;
148+
149+
ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
150+
lockdep_is_held(&ctx->uring_lock));
151+
if (ev_fd) {
152+
ctx->has_evfd = false;
153+
rcu_assign_pointer(ctx->io_ev_fd, NULL);
154+
if (atomic_dec_and_test(&ev_fd->refs))
155+
call_rcu(&ev_fd->rcu, io_eventfd_free);
156+
return 0;
157+
}
158+
159+
return -ENXIO;
160+
}

io_uring/eventfd.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
2+
struct io_ring_ctx;
3+
int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
4+
unsigned int eventfd_async);
5+
int io_eventfd_unregister(struct io_ring_ctx *ctx);
6+
7+
void io_eventfd_flush_signal(struct io_ring_ctx *ctx);
8+
void io_eventfd_signal(struct io_ring_ctx *ctx);

io_uring/io-wq.c

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ static inline struct io_wq_acct *io_get_acct(struct io_wq *wq, bool bound)
159159
static inline struct io_wq_acct *io_work_get_acct(struct io_wq *wq,
160160
struct io_wq_work *work)
161161
{
162-
return io_get_acct(wq, !(work->flags & IO_WQ_WORK_UNBOUND));
162+
return io_get_acct(wq, !(atomic_read(&work->flags) & IO_WQ_WORK_UNBOUND));
163163
}
164164

165165
static inline struct io_wq_acct *io_wq_get_acct(struct io_worker *worker)
@@ -451,7 +451,7 @@ static void __io_worker_idle(struct io_wq *wq, struct io_worker *worker)
451451

452452
static inline unsigned int io_get_work_hash(struct io_wq_work *work)
453453
{
454-
return work->flags >> IO_WQ_HASH_SHIFT;
454+
return atomic_read(&work->flags) >> IO_WQ_HASH_SHIFT;
455455
}
456456

457457
static bool io_wait_on_hash(struct io_wq *wq, unsigned int hash)
@@ -592,8 +592,9 @@ static void io_worker_handle_work(struct io_wq_acct *acct,
592592

593593
next_hashed = wq_next_work(work);
594594

595-
if (unlikely(do_kill) && (work->flags & IO_WQ_WORK_UNBOUND))
596-
work->flags |= IO_WQ_WORK_CANCEL;
595+
if (do_kill &&
596+
(atomic_read(&work->flags) & IO_WQ_WORK_UNBOUND))
597+
atomic_or(IO_WQ_WORK_CANCEL, &work->flags);
597598
wq->do_work(work);
598599
io_assign_current_work(worker, NULL);
599600

@@ -891,7 +892,7 @@ static bool io_wq_worker_wake(struct io_worker *worker, void *data)
891892
static void io_run_cancel(struct io_wq_work *work, struct io_wq *wq)
892893
{
893894
do {
894-
work->flags |= IO_WQ_WORK_CANCEL;
895+
atomic_or(IO_WQ_WORK_CANCEL, &work->flags);
895896
wq->do_work(work);
896897
work = wq->free_work(work);
897898
} while (work);
@@ -926,7 +927,7 @@ static bool io_wq_work_match_item(struct io_wq_work *work, void *data)
926927
void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work)
927928
{
928929
struct io_wq_acct *acct = io_work_get_acct(wq, work);
929-
unsigned long work_flags = work->flags;
930+
unsigned int work_flags = atomic_read(&work->flags);
930931
struct io_cb_cancel_data match = {
931932
.fn = io_wq_work_match_item,
932933
.data = work,
@@ -939,7 +940,7 @@ void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work)
939940
* been marked as one that should not get executed, cancel it here.
940941
*/
941942
if (test_bit(IO_WQ_BIT_EXIT, &wq->state) ||
942-
(work->flags & IO_WQ_WORK_CANCEL)) {
943+
(work_flags & IO_WQ_WORK_CANCEL)) {
943944
io_run_cancel(work, wq);
944945
return;
945946
}
@@ -982,15 +983,15 @@ void io_wq_hash_work(struct io_wq_work *work, void *val)
982983
unsigned int bit;
983984

984985
bit = hash_ptr(val, IO_WQ_HASH_ORDER);
985-
work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT));
986+
atomic_or(IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT), &work->flags);
986987
}
987988

988989
static bool __io_wq_worker_cancel(struct io_worker *worker,
989990
struct io_cb_cancel_data *match,
990991
struct io_wq_work *work)
991992
{
992993
if (work && match->fn(work, match->data)) {
993-
work->flags |= IO_WQ_WORK_CANCEL;
994+
atomic_or(IO_WQ_WORK_CANCEL, &work->flags);
994995
__set_notify_signal(worker->task);
995996
return true;
996997
}

io_uring/io-wq.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ bool io_wq_worker_stopped(void);
5656

5757
static inline bool io_wq_is_hashed(struct io_wq_work *work)
5858
{
59-
return work->flags & IO_WQ_WORK_HASHED;
59+
return atomic_read(&work->flags) & IO_WQ_WORK_HASHED;
6060
}
6161

6262
typedef bool (work_cancel_fn)(struct io_wq_work *, void *);

0 commit comments

Comments
 (0)