Skip to content

Commit 35c8711

Browse files
committed
io_uring/kbuf: add helpers for getting/peeking multiple buffers
Our provided buffer interface only allows selection of a single buffer. Add an API that allows getting/peeking multiple buffers at the same time. This is only implemented for the ring provided buffers. It could be added for the legacy provided buffers as well, but since it's strongly encouraged to use the new interface, let's keep it simpler and just provide it for the new API. The legacy interface will always just select a single buffer. There are two new main functions: io_buffers_select(), which selects up as many buffers as it can. The caller supplies the iovec array, and io_buffers_select() may allocate a bigger array if the 'out_len' being passed in is non-zero and bigger than what fits in the provided iovec. Buffers grabbed with this helper are permanently assigned. io_buffers_peek(), which works like io_buffers_select(), except they can be recycled, if needed. Callers using either of these functions should call io_put_kbufs() rather than io_put_kbuf() at completion time. The peek interface must be called with the ctx locked from peek to completion. This add a bit state for the request: - REQ_F_BUFFERS_COMMIT, which means that the the buffers have been peeked and should be committed to the buffer ring head when they are put as part of completion. Prior to this, req->buf_list was cleared to NULL when committed. Signed-off-by: Jens Axboe <[email protected]>
1 parent ac5f71a commit 35c8711

File tree

3 files changed

+201
-12
lines changed

3 files changed

+201
-12
lines changed

include/linux/io_uring_types.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -472,6 +472,7 @@ enum {
472472
REQ_F_CAN_POLL_BIT,
473473
REQ_F_BL_EMPTY_BIT,
474474
REQ_F_BL_NO_RECYCLE_BIT,
475+
REQ_F_BUFFERS_COMMIT_BIT,
475476

476477
/* not a real bit, just to check we're not overflowing the space */
477478
__REQ_F_LAST_BIT,
@@ -550,6 +551,8 @@ enum {
550551
REQ_F_BL_EMPTY = IO_REQ_FLAG(REQ_F_BL_EMPTY_BIT),
551552
/* don't recycle provided buffers for this request */
552553
REQ_F_BL_NO_RECYCLE = IO_REQ_FLAG(REQ_F_BL_NO_RECYCLE_BIT),
554+
/* buffer ring head needs incrementing on put */
555+
REQ_F_BUFFERS_COMMIT = IO_REQ_FLAG(REQ_F_BUFFERS_COMMIT_BIT),
553556
};
554557

555558
typedef void (*io_req_tw_func_t)(struct io_kiocb *req, struct io_tw_state *ts);

io_uring/kbuf.c

Lines changed: 154 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,27 @@ static void __user *io_provided_buffer_select(struct io_kiocb *req, size_t *len,
117117
return NULL;
118118
}
119119

120+
static int io_provided_buffers_select(struct io_kiocb *req, size_t *len,
121+
struct io_buffer_list *bl,
122+
struct iovec *iov)
123+
{
124+
void __user *buf;
125+
126+
buf = io_provided_buffer_select(req, len, bl);
127+
if (unlikely(!buf))
128+
return -ENOBUFS;
129+
130+
iov[0].iov_base = buf;
131+
iov[0].iov_len = *len;
132+
return 0;
133+
}
134+
135+
static struct io_uring_buf *io_ring_head_to_buf(struct io_uring_buf_ring *br,
136+
__u16 head, __u16 mask)
137+
{
138+
return &br->bufs[head & mask];
139+
}
140+
120141
static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len,
121142
struct io_buffer_list *bl,
122143
unsigned int issue_flags)
@@ -132,11 +153,10 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len,
132153
if (head + 1 == tail)
133154
req->flags |= REQ_F_BL_EMPTY;
134155

135-
head &= bl->mask;
136-
buf = &br->bufs[head];
156+
buf = io_ring_head_to_buf(br, head, bl->mask);
137157
if (*len == 0 || *len > buf->len)
138158
*len = buf->len;
139-
req->flags |= REQ_F_BUFFER_RING;
159+
req->flags |= REQ_F_BUFFER_RING | REQ_F_BUFFERS_COMMIT;
140160
req->buf_list = bl;
141161
req->buf_index = buf->bid;
142162

@@ -151,6 +171,7 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len,
151171
* the transfer completes (or if we get -EAGAIN and must poll of
152172
* retry).
153173
*/
174+
req->flags &= ~REQ_F_BUFFERS_COMMIT;
154175
req->buf_list = NULL;
155176
bl->head++;
156177
}
@@ -177,6 +198,136 @@ void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
177198
return ret;
178199
}
179200

201+
/* cap it at a reasonable 256, will be one page even for 4K */
202+
#define PEEK_MAX_IMPORT 256
203+
204+
static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg,
205+
struct io_buffer_list *bl)
206+
{
207+
struct io_uring_buf_ring *br = bl->buf_ring;
208+
struct iovec *iov = arg->iovs;
209+
int nr_iovs = arg->nr_iovs;
210+
__u16 nr_avail, tail, head;
211+
struct io_uring_buf *buf;
212+
213+
tail = smp_load_acquire(&br->tail);
214+
head = bl->head;
215+
nr_avail = min_t(__u16, tail - head, UIO_MAXIOV);
216+
if (unlikely(!nr_avail))
217+
return -ENOBUFS;
218+
219+
buf = io_ring_head_to_buf(br, head, bl->mask);
220+
if (arg->max_len) {
221+
int needed;
222+
223+
needed = (arg->max_len + buf->len - 1) / buf->len;
224+
needed = min(needed, PEEK_MAX_IMPORT);
225+
if (nr_avail > needed)
226+
nr_avail = needed;
227+
}
228+
229+
/*
230+
* only alloc a bigger array if we know we have data to map, eg not
231+
* a speculative peek operation.
232+
*/
233+
if (arg->mode & KBUF_MODE_EXPAND && nr_avail > nr_iovs && arg->max_len) {
234+
iov = kmalloc_array(nr_avail, sizeof(struct iovec), GFP_KERNEL);
235+
if (unlikely(!iov))
236+
return -ENOMEM;
237+
if (arg->mode & KBUF_MODE_FREE)
238+
kfree(arg->iovs);
239+
arg->iovs = iov;
240+
nr_iovs = nr_avail;
241+
} else if (nr_avail < nr_iovs) {
242+
nr_iovs = nr_avail;
243+
}
244+
245+
/* set it to max, if not set, so we can use it unconditionally */
246+
if (!arg->max_len)
247+
arg->max_len = INT_MAX;
248+
249+
req->buf_index = buf->bid;
250+
do {
251+
/* truncate end piece, if needed */
252+
if (buf->len > arg->max_len)
253+
buf->len = arg->max_len;
254+
255+
iov->iov_base = u64_to_user_ptr(buf->addr);
256+
iov->iov_len = buf->len;
257+
iov++;
258+
259+
arg->out_len += buf->len;
260+
arg->max_len -= buf->len;
261+
if (!arg->max_len)
262+
break;
263+
264+
buf = io_ring_head_to_buf(br, ++head, bl->mask);
265+
} while (--nr_iovs);
266+
267+
if (head == tail)
268+
req->flags |= REQ_F_BL_EMPTY;
269+
270+
req->flags |= REQ_F_BUFFER_RING;
271+
req->buf_list = bl;
272+
return iov - arg->iovs;
273+
}
274+
275+
int io_buffers_select(struct io_kiocb *req, struct buf_sel_arg *arg,
276+
unsigned int issue_flags)
277+
{
278+
struct io_ring_ctx *ctx = req->ctx;
279+
struct io_buffer_list *bl;
280+
int ret = -ENOENT;
281+
282+
io_ring_submit_lock(ctx, issue_flags);
283+
bl = io_buffer_get_list(ctx, req->buf_index);
284+
if (unlikely(!bl))
285+
goto out_unlock;
286+
287+
if (bl->is_buf_ring) {
288+
ret = io_ring_buffers_peek(req, arg, bl);
289+
/*
290+
* Don't recycle these buffers if we need to go through poll.
291+
* Nobody else can use them anyway, and holding on to provided
292+
* buffers for a send/write operation would happen on the app
293+
* side anyway with normal buffers. Besides, we already
294+
* committed them, they cannot be put back in the queue.
295+
*/
296+
if (ret > 0) {
297+
req->flags |= REQ_F_BL_NO_RECYCLE;
298+
req->buf_list->head += ret;
299+
}
300+
} else {
301+
ret = io_provided_buffers_select(req, &arg->out_len, bl, arg->iovs);
302+
}
303+
out_unlock:
304+
io_ring_submit_unlock(ctx, issue_flags);
305+
return ret;
306+
}
307+
308+
int io_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg)
309+
{
310+
struct io_ring_ctx *ctx = req->ctx;
311+
struct io_buffer_list *bl;
312+
int ret;
313+
314+
lockdep_assert_held(&ctx->uring_lock);
315+
316+
bl = io_buffer_get_list(ctx, req->buf_index);
317+
if (unlikely(!bl))
318+
return -ENOENT;
319+
320+
if (bl->is_buf_ring) {
321+
ret = io_ring_buffers_peek(req, arg, bl);
322+
if (ret > 0)
323+
req->flags |= REQ_F_BUFFERS_COMMIT;
324+
return ret;
325+
}
326+
327+
/* don't support multiple buffer selections for legacy */
328+
return io_provided_buffers_select(req, &arg->max_len, bl, arg->iovs);
329+
}
330+
180331
static int __io_remove_buffers(struct io_ring_ctx *ctx,
181332
struct io_buffer_list *bl, unsigned nbufs)
182333
{

io_uring/kbuf.h

Lines changed: 44 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,26 @@ struct io_buffer {
4141
__u16 bgid;
4242
};
4343

44+
enum {
45+
/* can alloc a bigger vec */
46+
KBUF_MODE_EXPAND = 1,
47+
/* if bigger vec allocated, free old one */
48+
KBUF_MODE_FREE = 2,
49+
};
50+
51+
struct buf_sel_arg {
52+
struct iovec *iovs;
53+
size_t out_len;
54+
size_t max_len;
55+
int nr_iovs;
56+
int mode;
57+
};
58+
4459
void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
4560
unsigned int issue_flags);
61+
int io_buffers_select(struct io_kiocb *req, struct buf_sel_arg *arg,
62+
unsigned int issue_flags);
63+
int io_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg);
4664
void io_destroy_buffers(struct io_ring_ctx *ctx);
4765

4866
int io_remove_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
@@ -75,7 +93,7 @@ static inline bool io_kbuf_recycle_ring(struct io_kiocb *req)
7593
*/
7694
if (req->buf_list) {
7795
req->buf_index = req->buf_list->bgid;
78-
req->flags &= ~REQ_F_BUFFER_RING;
96+
req->flags &= ~(REQ_F_BUFFER_RING|REQ_F_BUFFERS_COMMIT);
7997
return true;
8098
}
8199
return false;
@@ -99,11 +117,16 @@ static inline bool io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags)
99117
return false;
100118
}
101119

102-
static inline void __io_put_kbuf_ring(struct io_kiocb *req)
120+
static inline void __io_put_kbuf_ring(struct io_kiocb *req, int nr)
103121
{
104-
if (req->buf_list) {
105-
req->buf_index = req->buf_list->bgid;
106-
req->buf_list->head++;
122+
struct io_buffer_list *bl = req->buf_list;
123+
124+
if (bl) {
125+
if (req->flags & REQ_F_BUFFERS_COMMIT) {
126+
bl->head += nr;
127+
req->flags &= ~REQ_F_BUFFERS_COMMIT;
128+
}
129+
req->buf_index = bl->bgid;
107130
}
108131
req->flags &= ~REQ_F_BUFFER_RING;
109132
}
@@ -112,7 +135,7 @@ static inline void __io_put_kbuf_list(struct io_kiocb *req,
112135
struct list_head *list)
113136
{
114137
if (req->flags & REQ_F_BUFFER_RING) {
115-
__io_put_kbuf_ring(req);
138+
__io_put_kbuf_ring(req, 1);
116139
} else {
117140
req->buf_index = req->kbuf->bgid;
118141
list_add(&req->kbuf->list, list);
@@ -130,8 +153,8 @@ static inline void io_kbuf_drop(struct io_kiocb *req)
130153
__io_put_kbuf_list(req, &req->ctx->io_buffers_comp);
131154
}
132155

133-
static inline unsigned int io_put_kbuf(struct io_kiocb *req,
134-
unsigned issue_flags)
156+
static inline unsigned int __io_put_kbufs(struct io_kiocb *req, int nbufs,
157+
unsigned issue_flags)
135158
{
136159
unsigned int ret;
137160

@@ -140,9 +163,21 @@ static inline unsigned int io_put_kbuf(struct io_kiocb *req,
140163

141164
ret = IORING_CQE_F_BUFFER | (req->buf_index << IORING_CQE_BUFFER_SHIFT);
142165
if (req->flags & REQ_F_BUFFER_RING)
143-
__io_put_kbuf_ring(req);
166+
__io_put_kbuf_ring(req, nbufs);
144167
else
145168
__io_put_kbuf(req, issue_flags);
146169
return ret;
147170
}
171+
172+
static inline unsigned int io_put_kbuf(struct io_kiocb *req,
173+
unsigned issue_flags)
174+
{
175+
return __io_put_kbufs(req, 1, issue_flags);
176+
}
177+
178+
static inline unsigned int io_put_kbufs(struct io_kiocb *req, int nbufs,
179+
unsigned issue_flags)
180+
{
181+
return __io_put_kbufs(req, nbufs, issue_flags);
182+
}
148183
#endif

0 commit comments

Comments
 (0)