Skip to content

Commit 6df9d08

Browse files
committed
Merge tag 'for-6.15/io_uring-epoll-wait-20250325' of git://git.kernel.dk/linux
Pull io_uring epoll support from Jens Axboe: "This adds support for reading epoll events via io_uring. While this may seem counter-intuitive (and/or productive), the reasoning here is that quite a few existing epoll event loops can easily do a partial conversion to a completion based model, but are still stuck with one (or few) event types that remain readiness based. For that case, they then need to add the io_uring fd to the epoll context, and continue to rely on epoll_wait(2) for waiting on events. This misses out on the finer grained waiting that io_uring can do, to reduce context switches and wait for multiple events in one batch reliably. With adding support for reaping epoll events via io_uring, the whole legacy readiness based event types can still be reaped via epoll, with the overall waiting in the loop be driven by io_uring" * tag 'for-6.15/io_uring-epoll-wait-20250325' of git://git.kernel.dk/linux: io_uring/epoll: add support for IORING_OP_EPOLL_WAIT io_uring/epoll: remove CONFIG_EPOLL guards
2 parents ca0b04b + 19f7e94 commit 6df9d08

File tree

5 files changed

+55
-6
lines changed

5 files changed

+55
-6
lines changed

include/uapi/linux/io_uring.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,7 @@ enum io_uring_op {
280280
IORING_OP_BIND,
281281
IORING_OP_LISTEN,
282282
IORING_OP_RECV_ZC,
283+
IORING_OP_EPOLL_WAIT,
283284

284285
/* this goes last, obviously */
285286
IORING_OP_LAST,

io_uring/Makefile

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,11 @@ obj-$(CONFIG_IO_URING) += io_uring.o opdef.o kbuf.o rsrc.o notif.o \
1111
eventfd.o uring_cmd.o openclose.o \
1212
sqpoll.o xattr.o nop.o fs.o splice.o \
1313
sync.o msg_ring.o advise.o openclose.o \
14-
epoll.o statx.o timeout.o fdinfo.o \
15-
cancel.o waitid.o register.o \
16-
truncate.o memmap.o alloc_cache.o
14+
statx.o timeout.o fdinfo.o cancel.o \
15+
waitid.o register.o truncate.o \
16+
memmap.o alloc_cache.o
1717
obj-$(CONFIG_IO_URING_ZCRX) += zcrx.o
1818
obj-$(CONFIG_IO_WQ) += io-wq.o
1919
obj-$(CONFIG_FUTEX) += futex.o
20-
obj-$(CONFIG_NET_RX_BUSY_POLL) += napi.o
20+
obj-$(CONFIG_EPOLL) += epoll.o
21+
obj-$(CONFIG_NET_RX_BUSY_POLL) += napi.o

io_uring/epoll.c

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
#include "io_uring.h"
1313
#include "epoll.h"
1414

15-
#if defined(CONFIG_EPOLL)
1615
struct io_epoll {
1716
struct file *file;
1817
int epfd;
@@ -21,6 +20,12 @@ struct io_epoll {
2120
struct epoll_event event;
2221
};
2322

23+
struct io_epoll_wait {
24+
struct file *file;
25+
int maxevents;
26+
struct epoll_event __user *events;
27+
};
28+
2429
int io_epoll_ctl_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
2530
{
2631
struct io_epoll *epoll = io_kiocb_to_cmd(req, struct io_epoll);
@@ -58,4 +63,30 @@ int io_epoll_ctl(struct io_kiocb *req, unsigned int issue_flags)
5863
io_req_set_res(req, ret, 0);
5964
return IOU_OK;
6065
}
61-
#endif
66+
67+
int io_epoll_wait_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
68+
{
69+
struct io_epoll_wait *iew = io_kiocb_to_cmd(req, struct io_epoll_wait);
70+
71+
if (sqe->off || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
72+
return -EINVAL;
73+
74+
iew->maxevents = READ_ONCE(sqe->len);
75+
iew->events = u64_to_user_ptr(READ_ONCE(sqe->addr));
76+
return 0;
77+
}
78+
79+
int io_epoll_wait(struct io_kiocb *req, unsigned int issue_flags)
80+
{
81+
struct io_epoll_wait *iew = io_kiocb_to_cmd(req, struct io_epoll_wait);
82+
int ret;
83+
84+
ret = epoll_sendevents(req->file, iew->events, iew->maxevents);
85+
if (ret == 0)
86+
return -EAGAIN;
87+
if (ret < 0)
88+
req_set_fail(req);
89+
90+
io_req_set_res(req, ret, 0);
91+
return IOU_OK;
92+
}

io_uring/epoll.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,6 @@
33
#if defined(CONFIG_EPOLL)
44
int io_epoll_ctl_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
55
int io_epoll_ctl(struct io_kiocb *req, unsigned int issue_flags);
6+
int io_epoll_wait_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
7+
int io_epoll_wait(struct io_kiocb *req, unsigned int issue_flags);
68
#endif

io_uring/opdef.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,17 @@ const struct io_issue_def io_issue_defs[] = {
527527
.issue = io_recvzc,
528528
#else
529529
.prep = io_eopnotsupp_prep,
530+
#endif
531+
},
532+
[IORING_OP_EPOLL_WAIT] = {
533+
.needs_file = 1,
534+
.audit_skip = 1,
535+
.pollin = 1,
536+
#if defined(CONFIG_EPOLL)
537+
.prep = io_epoll_wait_prep,
538+
.issue = io_epoll_wait,
539+
#else
540+
.prep = io_eopnotsupp_prep,
530541
#endif
531542
},
532543
};
@@ -761,6 +772,9 @@ const struct io_cold_def io_cold_defs[] = {
761772
[IORING_OP_RECV_ZC] = {
762773
.name = "RECV_ZC",
763774
},
775+
[IORING_OP_EPOLL_WAIT] = {
776+
.name = "EPOLL_WAIT",
777+
},
764778
};
765779

766780
const char *io_uring_get_opcode(u8 opcode)

0 commit comments

Comments
 (0)