Skip to content

Commit 0fc8c2a

Browse files
Dylan Yudakenaxboe
authored andcommitted
io_uring: calculate CQEs from the user visible value
io_cqring_wait (and it's wake function io_has_work) used cached_cq_tail in order to calculate the number of CQEs. cached_cq_tail is set strictly before the user visible rings->cq.tail However as far as userspace is concerned, if io_uring_enter(2) is called with a minimum number of events, they will verify by checking rings->cq.tail. It is therefore possible for io_uring_enter(2) to return early with fewer events visible to the user. Instead make the wait functions read from the user visible value, so there will be no discrepency. This is triggered eventually by the following reproducer: struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; unsigned int cqe_ready; struct io_uring ring; int ret, i; ret = io_uring_queue_init(N, &ring, 0); assert(!ret); while(true) { for (i = 0; i < N; i++) { sqe = io_uring_get_sqe(&ring); io_uring_prep_nop(sqe); sqe->flags |= IOSQE_ASYNC; } ret = io_uring_submit(&ring); assert(ret == N); do { ret = io_uring_wait_cqes(&ring, &cqe, N, NULL, NULL); } while(ret == -EINTR); cqe_ready = io_uring_cq_ready(&ring); assert(!ret); assert(cqe_ready == N); io_uring_cq_advance(&ring, N); } Fixes: ad3eb2c ("io_uring: split overflow state into SQ and CQ side") Signed-off-by: Dylan Yudaken <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jens Axboe <[email protected]>
1 parent 6dcabcd commit 0fc8c2a

File tree

1 file changed

+8
-2
lines changed

1 file changed

+8
-2
lines changed

io_uring/io_uring.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,11 @@ static inline unsigned int __io_cqring_events(struct io_ring_ctx *ctx)
176176
return ctx->cached_cq_tail - READ_ONCE(ctx->rings->cq.head);
177177
}
178178

179+
static inline unsigned int __io_cqring_events_user(struct io_ring_ctx *ctx)
180+
{
181+
return READ_ONCE(ctx->rings->cq.tail) - READ_ONCE(ctx->rings->cq.head);
182+
}
183+
179184
static bool io_match_linked(struct io_kiocb *head)
180185
{
181186
struct io_kiocb *req;
@@ -2315,7 +2320,7 @@ static inline bool io_has_work(struct io_ring_ctx *ctx)
23152320
static inline bool io_should_wake(struct io_wait_queue *iowq)
23162321
{
23172322
struct io_ring_ctx *ctx = iowq->ctx;
2318-
int dist = ctx->cached_cq_tail - (int) iowq->cq_tail;
2323+
int dist = READ_ONCE(ctx->rings->cq.tail) - (int) iowq->cq_tail;
23192324

23202325
/*
23212326
* Wake up if we have enough events, or if a timeout occurred since we
@@ -2399,7 +2404,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
23992404
return ret;
24002405
io_cqring_overflow_flush(ctx);
24012406

2402-
if (io_cqring_events(ctx) >= min_events)
2407+
/* if user messes with these they will just get an early return */
2408+
if (__io_cqring_events_user(ctx) >= min_events)
24032409
return 0;
24042410
} while (ret > 0);
24052411

0 commit comments

Comments
 (0)