Skip to content

Commit cec53f4

Browse files
committed
Merge tag 'io_uring-6.0-2022-09-02' of git://git.kernel.dk/linux-block
Pull io_uring fixes from Jens Axboe: - A single fix for over-eager retries for networking (Pavel) - Revert the notification slot support for zerocopy sends. It turns out that even after more than a year or development and testing, there's not full agreement on whether just using plain ordered notifications is Good Enough to avoid the complexity of using the notifications slots. Because of that, we decided that it's best left to a future final decision. We can always bring back this feature, but we can't really change it or remove it once we've released 6.0 with it enabled. The reverts leave the usual CQE notifications as the primary interface for knowing when data was sent, and when it was acked. (Pavel) * tag 'io_uring-6.0-2022-09-02' of git://git.kernel.dk/linux-block: selftests/net: return back io_uring zc send tests io_uring/net: simplify zerocopy send user API io_uring/notif: remove notif registration Revert "io_uring: rename IORING_OP_FILES_UPDATE" Revert "io_uring: add zc notification flush requests" selftests/net: temporarily disable io_uring zc test io_uring/net: fix overexcessive retries
2 parents 1551f8f + 916d72c commit cec53f4

File tree

11 files changed

+99
-322
lines changed

11 files changed

+99
-322
lines changed

include/uapi/linux/io_uring.h

Lines changed: 6 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,8 @@ struct io_uring_sqe {
7171
__s32 splice_fd_in;
7272
__u32 file_index;
7373
struct {
74-
__u16 notification_idx;
7574
__u16 addr_len;
75+
__u16 __pad3[1];
7676
};
7777
};
7878
union {
@@ -178,8 +178,7 @@ enum io_uring_op {
178178
IORING_OP_FALLOCATE,
179179
IORING_OP_OPENAT,
180180
IORING_OP_CLOSE,
181-
IORING_OP_RSRC_UPDATE,
182-
IORING_OP_FILES_UPDATE = IORING_OP_RSRC_UPDATE,
181+
IORING_OP_FILES_UPDATE,
183182
IORING_OP_STATX,
184183
IORING_OP_READ,
185184
IORING_OP_WRITE,
@@ -206,7 +205,7 @@ enum io_uring_op {
206205
IORING_OP_GETXATTR,
207206
IORING_OP_SOCKET,
208207
IORING_OP_URING_CMD,
209-
IORING_OP_SENDZC_NOTIF,
208+
IORING_OP_SEND_ZC,
210209

211210
/* this goes last, obviously */
212211
IORING_OP_LAST,
@@ -228,7 +227,6 @@ enum io_uring_op {
228227
#define IORING_TIMEOUT_ETIME_SUCCESS (1U << 5)
229228
#define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME)
230229
#define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE)
231-
232230
/*
233231
* sqe->splice_flags
234232
* extends splice(2) flags
@@ -281,29 +279,16 @@ enum io_uring_op {
281279
*
282280
* IORING_RECVSEND_FIXED_BUF Use registered buffers, the index is stored in
283281
* the buf_index field.
284-
*
285-
* IORING_RECVSEND_NOTIF_FLUSH Flush a notification after a successful
286-
* successful. Only for zerocopy sends.
287282
*/
288283
#define IORING_RECVSEND_POLL_FIRST (1U << 0)
289284
#define IORING_RECV_MULTISHOT (1U << 1)
290285
#define IORING_RECVSEND_FIXED_BUF (1U << 2)
291-
#define IORING_RECVSEND_NOTIF_FLUSH (1U << 3)
292286

293287
/*
294288
* accept flags stored in sqe->ioprio
295289
*/
296290
#define IORING_ACCEPT_MULTISHOT (1U << 0)
297291

298-
299-
/*
300-
* IORING_OP_RSRC_UPDATE flags
301-
*/
302-
enum {
303-
IORING_RSRC_UPDATE_FILES,
304-
IORING_RSRC_UPDATE_NOTIF,
305-
};
306-
307292
/*
308293
* IORING_OP_MSG_RING command types, stored in sqe->addr
309294
*/
@@ -341,10 +326,13 @@ struct io_uring_cqe {
341326
* IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID
342327
* IORING_CQE_F_MORE If set, parent SQE will generate more CQE entries
343328
* IORING_CQE_F_SOCK_NONEMPTY If set, more data to read after socket recv
329+
* IORING_CQE_F_NOTIF Set for notification CQEs. Can be used to distinct
330+
* them from sends.
344331
*/
345332
#define IORING_CQE_F_BUFFER (1U << 0)
346333
#define IORING_CQE_F_MORE (1U << 1)
347334
#define IORING_CQE_F_SOCK_NONEMPTY (1U << 2)
335+
#define IORING_CQE_F_NOTIF (1U << 3)
348336

349337
enum {
350338
IORING_CQE_BUFFER_SHIFT = 16,
@@ -485,10 +473,6 @@ enum {
485473
/* register a range of fixed file slots for automatic slot allocation */
486474
IORING_REGISTER_FILE_ALLOC_RANGE = 25,
487475

488-
/* zerocopy notification API */
489-
IORING_REGISTER_NOTIFIERS = 26,
490-
IORING_UNREGISTER_NOTIFIERS = 27,
491-
492476
/* this goes last */
493477
IORING_REGISTER_LAST
494478
};

io_uring/io_uring.c

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2640,7 +2640,6 @@ static __cold void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
26402640
io_unregister_personality(ctx, index);
26412641
if (ctx->rings)
26422642
io_poll_remove_all(ctx, NULL, true);
2643-
io_notif_unregister(ctx);
26442643
mutex_unlock(&ctx->uring_lock);
26452644

26462645
/* failed during ring init, it couldn't have issued any requests */
@@ -3839,15 +3838,6 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
38393838
break;
38403839
ret = io_register_file_alloc_range(ctx, arg);
38413840
break;
3842-
case IORING_REGISTER_NOTIFIERS:
3843-
ret = io_notif_register(ctx, arg, nr_args);
3844-
break;
3845-
case IORING_UNREGISTER_NOTIFIERS:
3846-
ret = -EINVAL;
3847-
if (arg || nr_args)
3848-
break;
3849-
ret = io_notif_unregister(ctx);
3850-
break;
38513841
default:
38523842
ret = -EINVAL;
38533843
break;
@@ -3933,8 +3923,8 @@ static int __init io_uring_init(void)
39333923
BUILD_BUG_SQE_ELEM(42, __u16, personality);
39343924
BUILD_BUG_SQE_ELEM(44, __s32, splice_fd_in);
39353925
BUILD_BUG_SQE_ELEM(44, __u32, file_index);
3936-
BUILD_BUG_SQE_ELEM(44, __u16, notification_idx);
3937-
BUILD_BUG_SQE_ELEM(46, __u16, addr_len);
3926+
BUILD_BUG_SQE_ELEM(44, __u16, addr_len);
3927+
BUILD_BUG_SQE_ELEM(46, __u16, __pad3[0]);
39383928
BUILD_BUG_SQE_ELEM(48, __u64, addr3);
39393929
BUILD_BUG_SQE_ELEM_SIZE(48, 0, cmd);
39403930
BUILD_BUG_SQE_ELEM(56, __u64, __pad2);

io_uring/net.c

Lines changed: 35 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -65,12 +65,12 @@ struct io_sendzc {
6565
struct file *file;
6666
void __user *buf;
6767
size_t len;
68-
u16 slot_idx;
6968
unsigned msg_flags;
7069
unsigned flags;
7170
unsigned addr_len;
7271
void __user *addr;
7372
size_t done_io;
73+
struct io_kiocb *notif;
7474
};
7575

7676
#define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED)
@@ -879,17 +879,31 @@ int io_recv(struct io_kiocb *req, unsigned int issue_flags)
879879
return ret;
880880
}
881881

882+
void io_sendzc_cleanup(struct io_kiocb *req)
883+
{
884+
struct io_sendzc *zc = io_kiocb_to_cmd(req, struct io_sendzc);
885+
886+
zc->notif->flags |= REQ_F_CQE_SKIP;
887+
io_notif_flush(zc->notif);
888+
zc->notif = NULL;
889+
}
890+
882891
int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
883892
{
884893
struct io_sendzc *zc = io_kiocb_to_cmd(req, struct io_sendzc);
885894
struct io_ring_ctx *ctx = req->ctx;
895+
struct io_kiocb *notif;
886896

887-
if (READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3))
897+
if (READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3) ||
898+
READ_ONCE(sqe->__pad3[0]))
899+
return -EINVAL;
900+
/* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */
901+
if (req->flags & REQ_F_CQE_SKIP)
888902
return -EINVAL;
889903

890904
zc->flags = READ_ONCE(sqe->ioprio);
891905
if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST |
892-
IORING_RECVSEND_FIXED_BUF | IORING_RECVSEND_NOTIF_FLUSH))
906+
IORING_RECVSEND_FIXED_BUF))
893907
return -EINVAL;
894908
if (zc->flags & IORING_RECVSEND_FIXED_BUF) {
895909
unsigned idx = READ_ONCE(sqe->buf_index);
@@ -900,11 +914,17 @@ int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
900914
req->imu = READ_ONCE(ctx->user_bufs[idx]);
901915
io_req_set_rsrc_node(req, ctx, 0);
902916
}
917+
notif = zc->notif = io_alloc_notif(ctx);
918+
if (!notif)
919+
return -ENOMEM;
920+
notif->cqe.user_data = req->cqe.user_data;
921+
notif->cqe.res = 0;
922+
notif->cqe.flags = IORING_CQE_F_NOTIF;
923+
req->flags |= REQ_F_NEED_CLEANUP;
903924

904925
zc->buf = u64_to_user_ptr(READ_ONCE(sqe->addr));
905926
zc->len = READ_ONCE(sqe->len);
906927
zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
907-
zc->slot_idx = READ_ONCE(sqe->notification_idx);
908928
if (zc->msg_flags & MSG_DONTWAIT)
909929
req->flags |= REQ_F_NOWAIT;
910930

@@ -956,7 +976,7 @@ static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb,
956976
shinfo->nr_frags = frag;
957977
from->bvec += bi.bi_idx;
958978
from->nr_segs -= bi.bi_idx;
959-
from->count = bi.bi_size;
979+
from->count -= copied;
960980
from->iov_offset = bi.bi_bvec_done;
961981

962982
skb->data_len += copied;
@@ -976,33 +996,20 @@ static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb,
976996
int io_sendzc(struct io_kiocb *req, unsigned int issue_flags)
977997
{
978998
struct sockaddr_storage __address, *addr = NULL;
979-
struct io_ring_ctx *ctx = req->ctx;
980999
struct io_sendzc *zc = io_kiocb_to_cmd(req, struct io_sendzc);
981-
struct io_notif_slot *notif_slot;
982-
struct io_kiocb *notif;
9831000
struct msghdr msg;
9841001
struct iovec iov;
9851002
struct socket *sock;
986-
unsigned msg_flags;
1003+
unsigned msg_flags, cflags;
9871004
int ret, min_ret = 0;
9881005

9891006
if (!(req->flags & REQ_F_POLLED) &&
9901007
(zc->flags & IORING_RECVSEND_POLL_FIRST))
9911008
return -EAGAIN;
992-
993-
if (issue_flags & IO_URING_F_UNLOCKED)
994-
return -EAGAIN;
9951009
sock = sock_from_file(req->file);
9961010
if (unlikely(!sock))
9971011
return -ENOTSOCK;
9981012

999-
notif_slot = io_get_notif_slot(ctx, zc->slot_idx);
1000-
if (!notif_slot)
1001-
return -EINVAL;
1002-
notif = io_get_notif(ctx, notif_slot);
1003-
if (!notif)
1004-
return -ENOMEM;
1005-
10061013
msg.msg_name = NULL;
10071014
msg.msg_control = NULL;
10081015
msg.msg_controllen = 0;
@@ -1033,7 +1040,7 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags)
10331040
&msg.msg_iter);
10341041
if (unlikely(ret))
10351042
return ret;
1036-
ret = io_notif_account_mem(notif, zc->len);
1043+
ret = io_notif_account_mem(zc->notif, zc->len);
10371044
if (unlikely(ret))
10381045
return ret;
10391046
}
@@ -1045,7 +1052,7 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags)
10451052
min_ret = iov_iter_count(&msg.msg_iter);
10461053

10471054
msg.msg_flags = msg_flags;
1048-
msg.msg_ubuf = &io_notif_to_data(notif)->uarg;
1055+
msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg;
10491056
msg.sg_from_iter = io_sg_from_iter;
10501057
ret = sock_sendmsg(sock, &msg);
10511058

@@ -1060,18 +1067,22 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags)
10601067
req->flags |= REQ_F_PARTIAL_IO;
10611068
return io_setup_async_addr(req, addr, issue_flags);
10621069
}
1070+
if (ret < 0 && !zc->done_io)
1071+
zc->notif->flags |= REQ_F_CQE_SKIP;
10631072
if (ret == -ERESTARTSYS)
10641073
ret = -EINTR;
10651074
req_set_fail(req);
1066-
} else if (zc->flags & IORING_RECVSEND_NOTIF_FLUSH) {
1067-
io_notif_slot_flush_submit(notif_slot, 0);
10681075
}
10691076

10701077
if (ret >= 0)
10711078
ret += zc->done_io;
10721079
else if (zc->done_io)
10731080
ret = zc->done_io;
1074-
io_req_set_res(req, ret, 0);
1081+
1082+
io_notif_flush(zc->notif);
1083+
req->flags &= ~REQ_F_NEED_CLEANUP;
1084+
cflags = ret >= 0 ? IORING_CQE_F_MORE : 0;
1085+
io_req_set_res(req, ret, cflags);
10751086
return IOU_OK;
10761087
}
10771088

io_uring/net.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ int io_connect(struct io_kiocb *req, unsigned int issue_flags);
5555

5656
int io_sendzc(struct io_kiocb *req, unsigned int issue_flags);
5757
int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
58+
void io_sendzc_cleanup(struct io_kiocb *req);
5859

5960
void io_netmsg_cache_free(struct io_cache_entry *entry);
6061
#else

0 commit comments

Comments
 (0)