Skip to content

Commit f0b493e

Browse files
committed
io_uring: prevent potential eventfd recursion on poll
If we have nested or circular eventfd wakeups, then we can deadlock if we run them inline from our poll waitqueue wakeup handler. It's also possible to have very long chains of notifications, to the extent where we could risk blowing the stack. Check the eventfd recursion count before calling eventfd_signal(). If it's non-zero, then punt the signaling to async context. This is always safe, as it takes us out-of-line in terms of stack and locking context. Cc: [email protected] # 5.1+ Signed-off-by: Jens Axboe <[email protected]>
1 parent b5e683d commit f0b493e

File tree

1 file changed

+30
-7
lines changed

1 file changed

+30
-7
lines changed

fs/io_uring.c

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1020,21 +1020,28 @@ static struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx)
10201020

10211021
static inline bool io_should_trigger_evfd(struct io_ring_ctx *ctx)
10221022
{
1023+
if (!ctx->cq_ev_fd)
1024+
return false;
10231025
if (!ctx->eventfd_async)
10241026
return true;
10251027
return io_wq_current_is_worker() || in_interrupt();
10261028
}
10271029

1028-
static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
1030+
static void __io_cqring_ev_posted(struct io_ring_ctx *ctx, bool trigger_ev)
10291031
{
10301032
if (waitqueue_active(&ctx->wait))
10311033
wake_up(&ctx->wait);
10321034
if (waitqueue_active(&ctx->sqo_wait))
10331035
wake_up(&ctx->sqo_wait);
1034-
if (ctx->cq_ev_fd && io_should_trigger_evfd(ctx))
1036+
if (trigger_ev)
10351037
eventfd_signal(ctx->cq_ev_fd, 1);
10361038
}
10371039

1040+
static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
1041+
{
1042+
__io_cqring_ev_posted(ctx, io_should_trigger_evfd(ctx));
1043+
}
1044+
10381045
/* Returns true if there are no backlogged entries after the flush */
10391046
static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
10401047
{
@@ -3561,6 +3568,14 @@ static void io_poll_flush(struct io_wq_work **workptr)
35613568
__io_poll_flush(req->ctx, nodes);
35623569
}
35633570

3571+
static void io_poll_trigger_evfd(struct io_wq_work **workptr)
3572+
{
3573+
struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
3574+
3575+
eventfd_signal(req->ctx->cq_ev_fd, 1);
3576+
io_put_req(req);
3577+
}
3578+
35643579
static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
35653580
void *key)
35663581
{
@@ -3586,14 +3601,22 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
35863601

35873602
if (llist_empty(&ctx->poll_llist) &&
35883603
spin_trylock_irqsave(&ctx->completion_lock, flags)) {
3604+
bool trigger_ev;
3605+
35893606
hash_del(&req->hash_node);
35903607
io_poll_complete(req, mask, 0);
3591-
req->flags |= REQ_F_COMP_LOCKED;
3592-
io_put_req(req);
3593-
spin_unlock_irqrestore(&ctx->completion_lock, flags);
35943608

3595-
io_cqring_ev_posted(ctx);
3596-
req = NULL;
3609+
trigger_ev = io_should_trigger_evfd(ctx);
3610+
if (trigger_ev && eventfd_signal_count()) {
3611+
trigger_ev = false;
3612+
req->work.func = io_poll_trigger_evfd;
3613+
} else {
3614+
req->flags |= REQ_F_COMP_LOCKED;
3615+
io_put_req(req);
3616+
req = NULL;
3617+
}
3618+
spin_unlock_irqrestore(&ctx->completion_lock, flags);
3619+
__io_cqring_ev_posted(ctx, trigger_ev);
35973620
} else {
35983621
req->result = mask;
35993622
req->llist_node.next = NULL;

0 commit comments

Comments
 (0)