Skip to content

Commit 71a8538

Browse files
committed
io-wq: check for wq exit after adding new worker task_work
We check IO_WQ_BIT_EXIT before attempting to create a new worker, and wq exit cancels pending work if we have any. But it's possible to have a race between the two, where creation checks exit finding it not set, but we're in the process of exiting. The exit side will cancel pending creation task_work, but there's a gap where we add task_work after we've canceled existing creations at exit time. Fix this by checking the EXIT bit post adding the creation task_work. If it's set, run the same cancelation that exit does. Reported-and-tested-by: [email protected] Reviewed-by: Hao Xu <[email protected]> Signed-off-by: Jens Axboe <[email protected]>
1 parent 78a7806 commit 71a8538

File tree

1 file changed

+25
-6
lines changed

1 file changed

+25
-6
lines changed

fs/io-wq.c

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ static bool io_acct_cancel_pending_work(struct io_wqe *wqe,
142142
struct io_wqe_acct *acct,
143143
struct io_cb_cancel_data *match);
144144
static void create_worker_cb(struct callback_head *cb);
145+
static void io_wq_cancel_tw_create(struct io_wq *wq);
145146

146147
static bool io_worker_get(struct io_worker *worker)
147148
{
@@ -357,10 +358,22 @@ static bool io_queue_worker_create(struct io_worker *worker,
357358
test_and_set_bit_lock(0, &worker->create_state))
358359
goto fail_release;
359360

361+
atomic_inc(&wq->worker_refs);
360362
init_task_work(&worker->create_work, func);
361363
worker->create_index = acct->index;
362-
if (!task_work_add(wq->task, &worker->create_work, TWA_SIGNAL))
364+
if (!task_work_add(wq->task, &worker->create_work, TWA_SIGNAL)) {
365+
/*
366+
* EXIT may have been set after checking it above, check after
367+
* adding the task_work and remove any creation item if it is
368+
* now set. wq exit does that too, but we can have added this
369+
* work item after we canceled in io_wq_exit_workers().
370+
*/
371+
if (test_bit(IO_WQ_BIT_EXIT, &wq->state))
372+
io_wq_cancel_tw_create(wq);
373+
io_worker_ref_put(wq);
363374
return true;
375+
}
376+
io_worker_ref_put(wq);
364377
clear_bit_unlock(0, &worker->create_state);
365378
fail_release:
366379
io_worker_release(worker);
@@ -1196,20 +1209,26 @@ void io_wq_exit_start(struct io_wq *wq)
11961209
set_bit(IO_WQ_BIT_EXIT, &wq->state);
11971210
}
11981211

1199-
static void io_wq_exit_workers(struct io_wq *wq)
1212+
static void io_wq_cancel_tw_create(struct io_wq *wq)
12001213
{
12011214
struct callback_head *cb;
1202-
int node;
1203-
1204-
if (!wq->task)
1205-
return;
12061215

12071216
while ((cb = task_work_cancel_match(wq->task, io_task_work_match, wq)) != NULL) {
12081217
struct io_worker *worker;
12091218

12101219
worker = container_of(cb, struct io_worker, create_work);
12111220
io_worker_cancel_cb(worker);
12121221
}
1222+
}
1223+
1224+
static void io_wq_exit_workers(struct io_wq *wq)
1225+
{
1226+
int node;
1227+
1228+
if (!wq->task)
1229+
return;
1230+
1231+
io_wq_cancel_tw_create(wq);
12131232

12141233
rcu_read_lock();
12151234
for_each_node(node) {

0 commit comments

Comments
 (0)