Skip to content

Commit 6fe4220

Browse files
isilenceaxboe
authored andcommitted
io_uring/notif: implement notification stacking
The network stack allows only one ubuf_info per skb, and unlike MSG_ZEROCOPY, each io_uring zerocopy send will carry a separate ubuf_info. That means that send requests can't reuse a previosly allocated skb and need to get one more or more of new ones. That's fine for large sends, but otherwise it would spam the stack with lots of skbs carrying just a little data each. To help with that implement linking notification (i.e. an io_uring wrapper around ubuf_info) into a list. Each is refcounted by skbs and the stack as usual. additionally all non head entries keep a reference to the head, which they put down when their refcount hits 0. When the head have no more users, it'll efficiently put all notifications in a batch. As mentioned previously about ->io_link_skb, the callback implementation always allows to bind to an skb without a ubuf_info. Reviewed-by: Jens Axboe <[email protected]> Signed-off-by: Pavel Begunkov <[email protected]> Link: https://lore.kernel.org/r/bf1e7f9b72f9ecc99999fdc0d2cded5eea87fd0b.1713369317.git.asml.silence@gmail.com Signed-off-by: Jens Axboe <[email protected]>
1 parent 5a56946 commit 6fe4220

File tree

2 files changed

+67
-7
lines changed

2 files changed

+67
-7
lines changed

io_uring/notif.c

Lines changed: 64 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,18 +9,28 @@
99
#include "notif.h"
1010
#include "rsrc.h"
1111

12+
static const struct ubuf_info_ops io_ubuf_ops;
13+
1214
static void io_notif_tw_complete(struct io_kiocb *notif, struct io_tw_state *ts)
1315
{
1416
struct io_notif_data *nd = io_notif_to_data(notif);
1517

16-
if (unlikely(nd->zc_report) && (nd->zc_copied || !nd->zc_used))
17-
notif->cqe.res |= IORING_NOTIF_USAGE_ZC_COPIED;
18+
do {
19+
notif = cmd_to_io_kiocb(nd);
1820

19-
if (nd->account_pages && notif->ctx->user) {
20-
__io_unaccount_mem(notif->ctx->user, nd->account_pages);
21-
nd->account_pages = 0;
22-
}
23-
io_req_task_complete(notif, ts);
21+
lockdep_assert(refcount_read(&nd->uarg.refcnt) == 0);
22+
23+
if (unlikely(nd->zc_report) && (nd->zc_copied || !nd->zc_used))
24+
notif->cqe.res |= IORING_NOTIF_USAGE_ZC_COPIED;
25+
26+
if (nd->account_pages && notif->ctx->user) {
27+
__io_unaccount_mem(notif->ctx->user, nd->account_pages);
28+
nd->account_pages = 0;
29+
}
30+
31+
nd = nd->next;
32+
io_req_task_complete(notif, ts);
33+
} while (nd);
2434
}
2535

2636
void io_tx_ubuf_complete(struct sk_buff *skb, struct ubuf_info *uarg,
@@ -39,12 +49,56 @@ void io_tx_ubuf_complete(struct sk_buff *skb, struct ubuf_info *uarg,
3949
if (!refcount_dec_and_test(&uarg->refcnt))
4050
return;
4151

52+
if (nd->head != nd) {
53+
io_tx_ubuf_complete(skb, &nd->head->uarg, success);
54+
return;
55+
}
4256
notif->io_task_work.func = io_notif_tw_complete;
4357
__io_req_task_work_add(notif, IOU_F_TWQ_LAZY_WAKE);
4458
}
4559

60+
static int io_link_skb(struct sk_buff *skb, struct ubuf_info *uarg)
61+
{
62+
struct io_notif_data *nd, *prev_nd;
63+
struct io_kiocb *prev_notif, *notif;
64+
struct ubuf_info *prev_uarg = skb_zcopy(skb);
65+
66+
nd = container_of(uarg, struct io_notif_data, uarg);
67+
notif = cmd_to_io_kiocb(nd);
68+
69+
if (!prev_uarg) {
70+
net_zcopy_get(&nd->uarg);
71+
skb_zcopy_init(skb, &nd->uarg);
72+
return 0;
73+
}
74+
/* handle it separately as we can't link a notif to itself */
75+
if (unlikely(prev_uarg == &nd->uarg))
76+
return 0;
77+
/* we can't join two links together, just request a fresh skb */
78+
if (unlikely(nd->head != nd || nd->next))
79+
return -EEXIST;
80+
/* don't mix zc providers */
81+
if (unlikely(prev_uarg->ops != &io_ubuf_ops))
82+
return -EEXIST;
83+
84+
prev_nd = container_of(prev_uarg, struct io_notif_data, uarg);
85+
prev_notif = cmd_to_io_kiocb(nd);
86+
87+
/* make sure all noifications can be finished in the same task_work */
88+
if (unlikely(notif->ctx != prev_notif->ctx ||
89+
notif->task != prev_notif->task))
90+
return -EEXIST;
91+
92+
nd->head = prev_nd->head;
93+
nd->next = prev_nd->next;
94+
prev_nd->next = nd;
95+
net_zcopy_get(&nd->head->uarg);
96+
return 0;
97+
}
98+
4699
static const struct ubuf_info_ops io_ubuf_ops = {
47100
.complete = io_tx_ubuf_complete,
101+
.link_skb = io_link_skb,
48102
};
49103

50104
struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx)
@@ -65,6 +119,9 @@ struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx)
65119
nd = io_notif_to_data(notif);
66120
nd->zc_report = false;
67121
nd->account_pages = 0;
122+
nd->next = NULL;
123+
nd->head = nd;
124+
68125
nd->uarg.flags = IO_NOTIF_UBUF_FLAGS;
69126
nd->uarg.ops = &io_ubuf_ops;
70127
refcount_set(&nd->uarg.refcnt, 1);

io_uring/notif.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ struct io_notif_data {
1414
struct file *file;
1515
struct ubuf_info uarg;
1616

17+
struct io_notif_data *next;
18+
struct io_notif_data *head;
19+
1720
unsigned account_pages;
1821
bool zc_report;
1922
bool zc_used;

0 commit comments

Comments
 (0)