Skip to content

Commit 17a9105

Browse files
isilenceaxboe
authored andcommitted
io_uring/io-wq: close io-wq full-stop gap
There is an old problem with io-wq cancellation where requests should be killed and are in io-wq but are not discoverable, e.g. in @next_hashed or @linked vars of io_worker_handle_work(). It adds some unreliability to individual request canellation, but also may potentially get __io_uring_cancel() stuck. For instance: 1) An __io_uring_cancel()'s cancellation round have not found any request but there are some as desribed. 2) __io_uring_cancel() goes to sleep 3) Then workers wake up and try to execute those hidden requests that happen to be unbound. As we already cancel all requests of io-wq there, set IO_WQ_BIT_EXIT in advance, so preventing 3) from executing unbound requests. The workers will initially break looping because of getting a signal as they are threads of the dying/exec()'ing user task. Cc: [email protected] Signed-off-by: Pavel Begunkov <[email protected]> Link: https://lore.kernel.org/r/abfcf8c54cb9e8f7bfbad7e9a0cc5433cc70bdc2.1621781238.git.asml.silence@gmail.com Signed-off-by: Jens Axboe <[email protected]>
1 parent ba5ef6d commit 17a9105

File tree

3 files changed

+16
-12
lines changed

3 files changed

+16
-12
lines changed

fs/io-wq.c

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -979,13 +979,16 @@ static bool io_task_work_match(struct callback_head *cb, void *data)
979979
return cwd->wqe->wq == data;
980980
}
981981

982+
void io_wq_exit_start(struct io_wq *wq)
983+
{
984+
set_bit(IO_WQ_BIT_EXIT, &wq->state);
985+
}
986+
982987
static void io_wq_exit_workers(struct io_wq *wq)
983988
{
984989
struct callback_head *cb;
985990
int node;
986991

987-
set_bit(IO_WQ_BIT_EXIT, &wq->state);
988-
989992
if (!wq->task)
990993
return;
991994

@@ -1020,8 +1023,6 @@ static void io_wq_destroy(struct io_wq *wq)
10201023

10211024
cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
10221025

1023-
io_wq_exit_workers(wq);
1024-
10251026
for_each_node(node) {
10261027
struct io_wqe *wqe = wq->wqes[node];
10271028
struct io_cb_cancel_data match = {
@@ -1036,16 +1037,13 @@ static void io_wq_destroy(struct io_wq *wq)
10361037
kfree(wq);
10371038
}
10381039

1039-
void io_wq_put(struct io_wq *wq)
1040-
{
1041-
if (refcount_dec_and_test(&wq->refs))
1042-
io_wq_destroy(wq);
1043-
}
1044-
10451040
void io_wq_put_and_exit(struct io_wq *wq)
10461041
{
1042+
WARN_ON_ONCE(!test_bit(IO_WQ_BIT_EXIT, &wq->state));
1043+
10471044
io_wq_exit_workers(wq);
1048-
io_wq_put(wq);
1045+
if (refcount_dec_and_test(&wq->refs))
1046+
io_wq_destroy(wq);
10491047
}
10501048

10511049
static bool io_wq_worker_affinity(struct io_worker *worker, void *data)

fs/io-wq.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ struct io_wq_data {
122122
};
123123

124124
struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data);
125-
void io_wq_put(struct io_wq *wq);
125+
void io_wq_exit_start(struct io_wq *wq);
126126
void io_wq_put_and_exit(struct io_wq *wq);
127127

128128
void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);

fs/io_uring.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9078,6 +9078,9 @@ static void io_uring_cancel_sqpoll(struct io_sq_data *sqd)
90789078

90799079
if (!current->io_uring)
90809080
return;
9081+
if (tctx->io_wq)
9082+
io_wq_exit_start(tctx->io_wq);
9083+
90819084
WARN_ON_ONCE(!sqd || sqd->thread != current);
90829085

90839086
atomic_inc(&tctx->in_idle);
@@ -9112,6 +9115,9 @@ void __io_uring_cancel(struct files_struct *files)
91129115
DEFINE_WAIT(wait);
91139116
s64 inflight;
91149117

9118+
if (tctx->io_wq)
9119+
io_wq_exit_start(tctx->io_wq);
9120+
91159121
/* make sure overflow events are dropped */
91169122
atomic_inc(&tctx->in_idle);
91179123
do {

0 commit comments

Comments
 (0)