Skip to content

Commit ce593a6

Browse files
committed
io_uring: use signal based task_work running
Since 5.7, we've been using task_work to trigger async running of requests in the context of the original task. This generally works great, but there's a case where if the task is currently blocked in the kernel waiting on a condition to become true, it won't process task_work. Even though the task is woken, it just checks whatever condition it's waiting on, and goes back to sleep if it's still false. This is a problem if that very condition only becomes true when that task_work is run. An example of that is the task registering an eventfd with io_uring, and it's now blocked waiting on an eventfd read. That read could depend on a completion event, and that completion event won't get trigged until task_work has been run. Use the TWA_SIGNAL notification for task_work, so that we ensure that the task always runs the work when queued. Cc: [email protected] # v5.7 Signed-off-by: Jens Axboe <[email protected]>
1 parent e91b481 commit ce593a6

File tree

1 file changed

+24
-8
lines changed

1 file changed

+24
-8
lines changed

fs/io_uring.c

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4072,6 +4072,21 @@ struct io_poll_table {
40724072
int error;
40734073
};
40744074

4075+
static int io_req_task_work_add(struct io_kiocb *req, struct callback_head *cb,
4076+
int notify)
4077+
{
4078+
struct task_struct *tsk = req->task;
4079+
int ret;
4080+
4081+
if (req->ctx->flags & IORING_SETUP_SQPOLL)
4082+
notify = 0;
4083+
4084+
ret = task_work_add(tsk, cb, notify);
4085+
if (!ret)
4086+
wake_up_process(tsk);
4087+
return ret;
4088+
}
4089+
40754090
static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
40764091
__poll_t mask, task_work_func_t func)
40774092
{
@@ -4095,13 +4110,13 @@ static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
40954110
* of executing it. We can't safely execute it anyway, as we may not
40964111
* have the needed state needed for it anyway.
40974112
*/
4098-
ret = task_work_add(tsk, &req->task_work, true);
4113+
ret = io_req_task_work_add(req, &req->task_work, TWA_SIGNAL);
40994114
if (unlikely(ret)) {
41004115
WRITE_ONCE(poll->canceled, true);
41014116
tsk = io_wq_get_task(req->ctx->io_wq);
4102-
task_work_add(tsk, &req->task_work, true);
4117+
task_work_add(tsk, &req->task_work, 0);
4118+
wake_up_process(tsk);
41034119
}
4104-
wake_up_process(tsk);
41054120
return 1;
41064121
}
41074122

@@ -6182,19 +6197,20 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
61826197
do {
61836198
prepare_to_wait_exclusive(&ctx->wait, &iowq.wq,
61846199
TASK_INTERRUPTIBLE);
6200+
/* make sure we run task_work before checking for signals */
61856201
if (current->task_works)
61866202
task_work_run();
6187-
if (io_should_wake(&iowq, false))
6188-
break;
6189-
schedule();
61906203
if (signal_pending(current)) {
6191-
ret = -EINTR;
6204+
ret = -ERESTARTSYS;
61926205
break;
61936206
}
6207+
if (io_should_wake(&iowq, false))
6208+
break;
6209+
schedule();
61946210
} while (1);
61956211
finish_wait(&ctx->wait, &iowq.wq);
61966212

6197-
restore_saved_sigmask_unless(ret == -EINTR);
6213+
restore_saved_sigmask_unless(ret == -ERESTARTSYS);
61986214

61996215
return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0;
62006216
}

0 commit comments

Comments
 (0)