Skip to content

Commit c132953

Browse files
committed
io_uring/rsrc: allow cloning with node replacements
Currently cloning a buffer table will fail if the destination already has a table. But it should be possible to use it to replace existing elements. Add a IORING_REGISTER_DST_REPLACE cloning flag, which if set, will allow the destination to already having a buffer table. If that is the case, then entries designated by offset + nr buffers will be replaced if they already exist. Note that it's allowed to use IORING_REGISTER_DST_REPLACE and not have an existing table, in which case it'll work just like not having the flag set and an empty table - it'll just assign the newly created table for that case. Signed-off-by: Jens Axboe <[email protected]>
1 parent b16e920 commit c132953

File tree

2 files changed

+54
-15
lines changed

2 files changed

+54
-15
lines changed

include/uapi/linux/io_uring.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -713,7 +713,8 @@ struct io_uring_clock_register {
713713
};
714714

715715
enum {
716-
IORING_REGISTER_SRC_REGISTERED = 1,
716+
IORING_REGISTER_SRC_REGISTERED = (1U << 0),
717+
IORING_REGISTER_DST_REPLACE = (1U << 1),
717718
};
718719

719720
struct io_uring_clone_buffers {

io_uring/rsrc.c

Lines changed: 52 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -930,8 +930,40 @@ int io_import_fixed(int ddir, struct iov_iter *iter,
930930
static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx,
931931
struct io_uring_clone_buffers *arg)
932932
{
933-
int i, ret, nbufs, off, nr;
934933
struct io_rsrc_data data;
934+
int i, ret, off, nr;
935+
unsigned int nbufs;
936+
937+
/* if offsets are given, must have nr specified too */
938+
if (!arg->nr && (arg->dst_off || arg->src_off))
939+
return -EINVAL;
940+
/* not allowed unless REPLACE is set */
941+
if (ctx->buf_table.nr && !(arg->flags & IORING_REGISTER_DST_REPLACE))
942+
return -EBUSY;
943+
944+
nbufs = READ_ONCE(src_ctx->buf_table.nr);
945+
if (!arg->nr)
946+
arg->nr = nbufs;
947+
else if (arg->nr > nbufs)
948+
return -EINVAL;
949+
else if (arg->nr > IORING_MAX_REG_BUFFERS)
950+
return -EINVAL;
951+
if (check_add_overflow(arg->nr, arg->dst_off, &nbufs))
952+
return -EOVERFLOW;
953+
954+
ret = io_rsrc_data_alloc(&data, max(nbufs, ctx->buf_table.nr));
955+
if (ret)
956+
return ret;
957+
958+
/* Fill entries in data from dst that won't overlap with src */
959+
for (i = 0; i < min(arg->dst_off, ctx->buf_table.nr); i++) {
960+
struct io_rsrc_node *src_node = ctx->buf_table.nodes[i];
961+
962+
if (src_node) {
963+
data.nodes[i] = src_node;
964+
src_node->refs++;
965+
}
966+
}
935967

936968
/*
937969
* Drop our own lock here. We'll setup the data we need and reference
@@ -954,14 +986,6 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
954986
goto out_unlock;
955987
if (off > nbufs)
956988
goto out_unlock;
957-
if (check_add_overflow(arg->nr, arg->dst_off, &off))
958-
goto out_unlock;
959-
ret = -EINVAL;
960-
if (off > IORING_MAX_REG_BUFFERS)
961-
goto out_unlock;
962-
ret = io_rsrc_data_alloc(&data, off);
963-
if (ret)
964-
goto out_unlock;
965989

966990
off = arg->dst_off;
967991
i = arg->src_off;
@@ -989,6 +1013,20 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
9891013
/* Have a ref on the bufs now, drop src lock and re-grab our own lock */
9901014
mutex_unlock(&src_ctx->uring_lock);
9911015
mutex_lock(&ctx->uring_lock);
1016+
1017+
/*
1018+
* If asked for replace, put the old table. data->nodes[] holds both
1019+
* old and new nodes at this point.
1020+
*/
1021+
if (arg->flags & IORING_REGISTER_DST_REPLACE)
1022+
io_rsrc_data_free(&ctx->buf_table);
1023+
1024+
/*
1025+
* ctx->buf_table should be empty now - either the contents are being
1026+
* replaced and we just freed the table, or someone raced setting up
1027+
* a buffer table while the clone was happening. If not empty, fall
1028+
* through to failure handling.
1029+
*/
9921030
if (!ctx->buf_table.nr) {
9931031
ctx->buf_table = data;
9941032
return 0;
@@ -998,14 +1036,14 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
9981036
mutex_lock(&src_ctx->uring_lock);
9991037
/* someone raced setting up buffers, dump ours */
10001038
ret = -EBUSY;
1001-
i = nbufs;
10021039
out_put_free:
1040+
i = data.nr;
10031041
while (i--) {
10041042
io_buffer_unmap(src_ctx, data.nodes[i]);
10051043
kfree(data.nodes[i]);
10061044
}
1007-
io_rsrc_data_free(&data);
10081045
out_unlock:
1046+
io_rsrc_data_free(&data);
10091047
mutex_unlock(&src_ctx->uring_lock);
10101048
mutex_lock(&ctx->uring_lock);
10111049
return ret;
@@ -1025,12 +1063,12 @@ int io_register_clone_buffers(struct io_ring_ctx *ctx, void __user *arg)
10251063
struct file *file;
10261064
int ret;
10271065

1028-
if (ctx->buf_table.nr)
1029-
return -EBUSY;
10301066
if (copy_from_user(&buf, arg, sizeof(buf)))
10311067
return -EFAULT;
1032-
if (buf.flags & ~IORING_REGISTER_SRC_REGISTERED)
1068+
if (buf.flags & ~(IORING_REGISTER_SRC_REGISTERED|IORING_REGISTER_DST_REPLACE))
10331069
return -EINVAL;
1070+
if (!(buf.flags & IORING_REGISTER_DST_REPLACE) && ctx->buf_table.nr)
1071+
return -EBUSY;
10341072
if (memchr_inv(buf.pad, 0, sizeof(buf.pad)))
10351073
return -EINVAL;
10361074

0 commit comments

Comments
 (0)