Skip to content

Commit 6e76ac5

Browse files
joshtriplettaxboe
authored andcommitted
io_uring: Add io_uring_setup flag to pre-register ring fd and never install it
With IORING_REGISTER_USE_REGISTERED_RING, an application can register the ring fd and use it via registered index rather than installed fd. This allows using a registered ring for everything *except* the initial mmap. With IORING_SETUP_NO_MMAP, io_uring_setup uses buffers allocated by the user, rather than requiring a subsequent mmap. The combination of the two allows a user to operate *entirely* via a registered ring fd, making it unnecessary to ever install the fd in the first place. So, add a flag IORING_SETUP_REGISTERED_FD_ONLY to make io_uring_setup register the fd and return a registered index, without installing the fd. This allows an application to avoid touching the fd table at all, and allows a library to never even momentarily install a file descriptor. This splits out an io_ring_add_registered_file helper from io_ring_add_registered_fd, for use by io_uring_setup. Signed-off-by: Josh Triplett <[email protected]> Link: https://lore.kernel.org/r/bc8f431bada371c183b95a83399628b605e978a3.1682699803.git.josh@joshtriplett.org Signed-off-by: Jens Axboe <[email protected]>
1 parent 03d89a2 commit 6e76ac5

File tree

4 files changed

+52
-26
lines changed

4 files changed

+52
-26
lines changed

include/uapi/linux/io_uring.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,13 @@ enum {
178178
*/
179179
#define IORING_SETUP_NO_MMAP (1U << 14)
180180

181+
/*
182+
* Register the ring fd in itself for use with
183+
* IORING_REGISTER_USE_REGISTERED_RING; return a registered fd index rather
184+
* than an fd.
185+
*/
186+
#define IORING_SETUP_REGISTERED_FD_ONLY (1U << 15)
187+
181188
enum io_uring_op {
182189
IORING_OP_NOP,
183190
IORING_OP_READV,

io_uring/io_uring.c

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3788,19 +3788,13 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx,
37883788
return 0;
37893789
}
37903790

3791-
static int io_uring_install_fd(struct io_ring_ctx *ctx, struct file *file)
3791+
static int io_uring_install_fd(struct file *file)
37923792
{
3793-
int ret, fd;
3793+
int fd;
37943794

37953795
fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
37963796
if (fd < 0)
37973797
return fd;
3798-
3799-
ret = __io_uring_add_tctx_node(ctx);
3800-
if (ret) {
3801-
put_unused_fd(fd);
3802-
return ret;
3803-
}
38043798
fd_install(fd, file);
38053799
return fd;
38063800
}
@@ -3840,6 +3834,7 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
38403834
struct io_uring_params __user *params)
38413835
{
38423836
struct io_ring_ctx *ctx;
3837+
struct io_uring_task *tctx;
38433838
struct file *file;
38443839
int ret;
38453840

@@ -3851,6 +3846,10 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
38513846
entries = IORING_MAX_ENTRIES;
38523847
}
38533848

3849+
if ((p->flags & IORING_SETUP_REGISTERED_FD_ONLY)
3850+
&& !(p->flags & IORING_SETUP_NO_MMAP))
3851+
return -EINVAL;
3852+
38543853
/*
38553854
* Use twice as many entries for the CQ ring. It's possible for the
38563855
* application to drive a higher depth than the size of the SQ ring,
@@ -4007,22 +4006,30 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
40074006
goto err;
40084007
}
40094008

4009+
ret = __io_uring_add_tctx_node(ctx);
4010+
if (ret)
4011+
goto err_fput;
4012+
tctx = current->io_uring;
4013+
40104014
/*
40114015
* Install ring fd as the very last thing, so we don't risk someone
40124016
* having closed it before we finish setup
40134017
*/
4014-
ret = io_uring_install_fd(ctx, file);
4015-
if (ret < 0) {
4016-
/* fput will clean it up */
4017-
fput(file);
4018-
return ret;
4019-
}
4018+
if (p->flags & IORING_SETUP_REGISTERED_FD_ONLY)
4019+
ret = io_ring_add_registered_file(tctx, file, 0, IO_RINGFD_REG_MAX);
4020+
else
4021+
ret = io_uring_install_fd(file);
4022+
if (ret < 0)
4023+
goto err_fput;
40204024

40214025
trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags);
40224026
return ret;
40234027
err:
40244028
io_ring_ctx_wait_and_kill(ctx);
40254029
return ret;
4030+
err_fput:
4031+
fput(file);
4032+
return ret;
40264033
}
40274034

40284035
/*
@@ -4049,7 +4056,7 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
40494056
IORING_SETUP_COOP_TASKRUN | IORING_SETUP_TASKRUN_FLAG |
40504057
IORING_SETUP_SQE128 | IORING_SETUP_CQE32 |
40514058
IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN |
4052-
IORING_SETUP_NO_MMAP))
4059+
IORING_SETUP_NO_MMAP | IORING_SETUP_REGISTERED_FD_ONLY))
40534060
return -EINVAL;
40544061

40554062
return io_uring_create(entries, &p, params);

io_uring/io_uring.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,9 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd);
7575
int io_uring_alloc_task_context(struct task_struct *task,
7676
struct io_ring_ctx *ctx);
7777

78+
int io_ring_add_registered_file(struct io_uring_task *tctx, struct file *file,
79+
int start, int end);
80+
7881
int io_poll_issue(struct io_kiocb *req, struct io_tw_state *ts);
7982
int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr);
8083
int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin);

io_uring/tctx.c

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -208,31 +208,40 @@ void io_uring_unreg_ringfd(void)
208208
}
209209
}
210210

211-
static int io_ring_add_registered_fd(struct io_uring_task *tctx, int fd,
211+
int io_ring_add_registered_file(struct io_uring_task *tctx, struct file *file,
212212
int start, int end)
213213
{
214-
struct file *file;
215214
int offset;
216-
217215
for (offset = start; offset < end; offset++) {
218216
offset = array_index_nospec(offset, IO_RINGFD_REG_MAX);
219217
if (tctx->registered_rings[offset])
220218
continue;
221219

222-
file = fget(fd);
223-
if (!file) {
224-
return -EBADF;
225-
} else if (!io_is_uring_fops(file)) {
226-
fput(file);
227-
return -EOPNOTSUPP;
228-
}
229220
tctx->registered_rings[offset] = file;
230221
return offset;
231222
}
232-
233223
return -EBUSY;
234224
}
235225

226+
static int io_ring_add_registered_fd(struct io_uring_task *tctx, int fd,
227+
int start, int end)
228+
{
229+
struct file *file;
230+
int offset;
231+
232+
file = fget(fd);
233+
if (!file) {
234+
return -EBADF;
235+
} else if (!io_is_uring_fops(file)) {
236+
fput(file);
237+
return -EOPNOTSUPP;
238+
}
239+
offset = io_ring_add_registered_file(tctx, file, start, end);
240+
if (offset < 0)
241+
fput(file);
242+
return offset;
243+
}
244+
236245
/*
237246
* Register a ring fd to avoid fdget/fdput for each io_uring_enter()
238247
* invocation. User passes in an array of struct io_uring_rsrc_update

0 commit comments

Comments
 (0)