Skip to content

Commit 7563439

Browse files
committed
io-wq: don't call kXalloc_node() with non-online node
Glauber reports a crash on init on a box he has: RIP: 0010:__alloc_pages_nodemask+0x132/0x340 Code: 18 01 75 04 41 80 ce 80 89 e8 48 8b 54 24 08 8b 74 24 1c c1 e8 0c 48 8b 3c 24 83 e0 01 88 44 24 20 48 85 d2 0f 85 74 01 00 00 <3b> 77 08 0f 82 6b 01 00 00 48 89 7c 24 10 89 ea 48 8b 07 b9 00 02 RSP: 0018:ffffb8be4d0b7c28 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 000000000000e8e8 RDX: 0000000000000000 RSI: 0000000000000002 RDI: 0000000000002080 RBP: 0000000000012cc0 R08: 0000000000000000 R09: 0000000000000002 R10: 0000000000000dc0 R11: ffff995c60400100 R12: 0000000000000000 R13: 0000000000012cc0 R14: 0000000000000001 R15: ffff995c60db00f0 FS: 00007f4d115ca900(0000) GS:ffff995c60d80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000002088 CR3: 00000017cca66002 CR4: 00000000007606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: alloc_slab_page+0x46/0x320 new_slab+0x9d/0x4e0 ___slab_alloc+0x507/0x6a0 ? io_wq_create+0xb4/0x2a0 __slab_alloc+0x1c/0x30 kmem_cache_alloc_node_trace+0xa6/0x260 io_wq_create+0xb4/0x2a0 io_uring_setup+0x97f/0xaa0 ? io_remove_personalities+0x30/0x30 ? io_poll_trigger_evfd+0x30/0x30 do_syscall_64+0x5b/0x1c0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f4d116cb1ed which is due to the 'wqe' and 'worker' allocation being node affine. But it isn't valid to call the node affine allocation if the node isn't online. Setup structures for even offline nodes, as usual, but skip them in terms of thread setup to not waste resources. If the node isn't online, just alloc memory with NUMA_NO_NODE. Reported-by: Glauber Costa <[email protected]> Tested-by: Glauber Costa <[email protected]> Signed-off-by: Jens Axboe <[email protected]>
1 parent b537916 commit 7563439

File tree

1 file changed

+18
-4
lines changed

1 file changed

+18
-4
lines changed

fs/io-wq.c

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -699,18 +699,26 @@ static int io_wq_manager(void *data)
699699
/* create fixed workers */
700700
refcount_set(&wq->refs, workers_to_create);
701701
for_each_node(node) {
702+
if (!node_online(node))
703+
continue;
702704
if (!create_io_worker(wq, wq->wqes[node], IO_WQ_ACCT_BOUND))
703705
goto err;
704706
workers_to_create--;
705707
}
706708

709+
while (workers_to_create--)
710+
refcount_dec(&wq->refs);
711+
707712
complete(&wq->done);
708713

709714
while (!kthread_should_stop()) {
710715
for_each_node(node) {
711716
struct io_wqe *wqe = wq->wqes[node];
712717
bool fork_worker[2] = { false, false };
713718

719+
if (!node_online(node))
720+
continue;
721+
714722
spin_lock_irq(&wqe->lock);
715723
if (io_wqe_need_worker(wqe, IO_WQ_ACCT_BOUND))
716724
fork_worker[IO_WQ_ACCT_BOUND] = true;
@@ -829,7 +837,9 @@ static bool io_wq_for_each_worker(struct io_wqe *wqe,
829837

830838
list_for_each_entry_rcu(worker, &wqe->all_list, all_list) {
831839
if (io_worker_get(worker)) {
832-
ret = func(worker, data);
840+
/* no task if node is/was offline */
841+
if (worker->task)
842+
ret = func(worker, data);
833843
io_worker_release(worker);
834844
if (ret)
835845
break;
@@ -1084,6 +1094,8 @@ void io_wq_flush(struct io_wq *wq)
10841094
for_each_node(node) {
10851095
struct io_wqe *wqe = wq->wqes[node];
10861096

1097+
if (!node_online(node))
1098+
continue;
10871099
init_completion(&data.done);
10881100
INIT_IO_WORK(&data.work, io_wq_flush_func);
10891101
data.work.flags |= IO_WQ_WORK_INTERNAL;
@@ -1115,20 +1127,22 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
11151127

11161128
for_each_node(node) {
11171129
struct io_wqe *wqe;
1130+
int alloc_node = node;
11181131

1119-
wqe = kzalloc_node(sizeof(struct io_wqe), GFP_KERNEL, node);
1132+
if (!node_online(alloc_node))
1133+
alloc_node = NUMA_NO_NODE;
1134+
wqe = kzalloc_node(sizeof(struct io_wqe), GFP_KERNEL, alloc_node);
11201135
if (!wqe)
11211136
goto err;
11221137
wq->wqes[node] = wqe;
1123-
wqe->node = node;
1138+
wqe->node = alloc_node;
11241139
wqe->acct[IO_WQ_ACCT_BOUND].max_workers = bounded;
11251140
atomic_set(&wqe->acct[IO_WQ_ACCT_BOUND].nr_running, 0);
11261141
if (wq->user) {
11271142
wqe->acct[IO_WQ_ACCT_UNBOUND].max_workers =
11281143
task_rlimit(current, RLIMIT_NPROC);
11291144
}
11301145
atomic_set(&wqe->acct[IO_WQ_ACCT_UNBOUND].nr_running, 0);
1131-
wqe->node = node;
11321146
wqe->wq = wq;
11331147
spin_lock_init(&wqe->lock);
11341148
INIT_WQ_LIST(&wqe->work_list);

0 commit comments

Comments
 (0)