Skip to content

Commit ca60ad6

Browse files
committed
Merge tag 'io_uring-5.6-2020-02-14' of git://git.kernel.dk/linux-block
Pull io_uring fixes from Jens Axboe: "Here's a set of fixes for io_uring: - Various fixes with cleanups from Pavel, fixing corner cases where we're not correctly dealing with iovec cleanup. - Clarify that statx/openat/openat2 don't accept fixed files - Buffered raw device write EOPTNOTSUPP fix - Ensure async workers grab current->fs - A few task exit fixes with pending requests that grab the file table - send/recvmsg async load fix - io-wq offline node setup fix - CQ overflow flush in poll" * tag 'io_uring-5.6-2020-02-14' of git://git.kernel.dk/linux-block: (21 commits) io_uring: prune request from overflow list on flush io-wq: don't call kXalloc_node() with non-online node io_uring: retain sockaddr_storage across send/recvmsg async punt io_uring: cancel pending async work if task exits io-wq: add io_wq_cancel_pid() to cancel based on a specific pid io-wq: make io_wqe_cancel_work() take a match handler io_uring: fix openat/statx's filename leak io_uring: fix double prep iovec leak io_uring: fix async close() with f_op->flush() io_uring: allow AT_FDCWD for non-file openat/openat2/statx io_uring: grab ->fs as part of async preparation io-wq: add support for inheriting ->fs io_uring: retry raw bdev writes if we hit -EOPNOTSUPP io_uring: add cleanup for openat()/statx() io_uring: fix iovec leaks io_uring: remove unused struct io_async_open io_uring: flush overflowed CQ events in the io_uring_poll() io_uring: statx/openat/openat2 don't support fixed files io_uring: fix deferred req iovec leak io_uring: fix 1-bit bitfields to be unsigned ...
2 parents 2019fc9 + 2ca1025 commit ca60ad6

File tree

3 files changed

+284
-113
lines changed

3 files changed

+284
-113
lines changed

fs/io-wq.c

Lines changed: 77 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <linux/slab.h>
1717
#include <linux/kthread.h>
1818
#include <linux/rculist_nulls.h>
19+
#include <linux/fs_struct.h>
1920

2021
#include "io-wq.h"
2122

@@ -59,6 +60,7 @@ struct io_worker {
5960
const struct cred *cur_creds;
6061
const struct cred *saved_creds;
6162
struct files_struct *restore_files;
63+
struct fs_struct *restore_fs;
6264
};
6365

6466
#if BITS_PER_LONG == 64
@@ -151,6 +153,9 @@ static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker)
151153
task_unlock(current);
152154
}
153155

156+
if (current->fs != worker->restore_fs)
157+
current->fs = worker->restore_fs;
158+
154159
/*
155160
* If we have an active mm, we need to drop the wq lock before unusing
156161
* it. If we do, return true and let the caller retry the idle loop.
@@ -311,6 +316,7 @@ static void io_worker_start(struct io_wqe *wqe, struct io_worker *worker)
311316

312317
worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
313318
worker->restore_files = current->files;
319+
worker->restore_fs = current->fs;
314320
io_wqe_inc_running(wqe, worker);
315321
}
316322

@@ -481,6 +487,8 @@ static void io_worker_handle_work(struct io_worker *worker)
481487
current->files = work->files;
482488
task_unlock(current);
483489
}
490+
if (work->fs && current->fs != work->fs)
491+
current->fs = work->fs;
484492
if (work->mm != worker->mm)
485493
io_wq_switch_mm(worker, work);
486494
if (worker->cur_creds != work->creds)
@@ -691,18 +699,26 @@ static int io_wq_manager(void *data)
691699
/* create fixed workers */
692700
refcount_set(&wq->refs, workers_to_create);
693701
for_each_node(node) {
702+
if (!node_online(node))
703+
continue;
694704
if (!create_io_worker(wq, wq->wqes[node], IO_WQ_ACCT_BOUND))
695705
goto err;
696706
workers_to_create--;
697707
}
698708

709+
while (workers_to_create--)
710+
refcount_dec(&wq->refs);
711+
699712
complete(&wq->done);
700713

701714
while (!kthread_should_stop()) {
702715
for_each_node(node) {
703716
struct io_wqe *wqe = wq->wqes[node];
704717
bool fork_worker[2] = { false, false };
705718

719+
if (!node_online(node))
720+
continue;
721+
706722
spin_lock_irq(&wqe->lock);
707723
if (io_wqe_need_worker(wqe, IO_WQ_ACCT_BOUND))
708724
fork_worker[IO_WQ_ACCT_BOUND] = true;
@@ -821,7 +837,9 @@ static bool io_wq_for_each_worker(struct io_wqe *wqe,
821837

822838
list_for_each_entry_rcu(worker, &wqe->all_list, all_list) {
823839
if (io_worker_get(worker)) {
824-
ret = func(worker, data);
840+
/* no task if node is/was offline */
841+
if (worker->task)
842+
ret = func(worker, data);
825843
io_worker_release(worker);
826844
if (ret)
827845
break;
@@ -929,17 +947,19 @@ enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
929947
return ret;
930948
}
931949

950+
struct work_match {
951+
bool (*fn)(struct io_wq_work *, void *data);
952+
void *data;
953+
};
954+
932955
static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
933956
{
934-
struct io_wq_work *work = data;
957+
struct work_match *match = data;
935958
unsigned long flags;
936959
bool ret = false;
937960

938-
if (worker->cur_work != work)
939-
return false;
940-
941961
spin_lock_irqsave(&worker->lock, flags);
942-
if (worker->cur_work == work &&
962+
if (match->fn(worker->cur_work, match->data) &&
943963
!(worker->cur_work->flags & IO_WQ_WORK_NO_CANCEL)) {
944964
send_sig(SIGINT, worker->task, 1);
945965
ret = true;
@@ -950,15 +970,13 @@ static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
950970
}
951971

952972
static enum io_wq_cancel io_wqe_cancel_work(struct io_wqe *wqe,
953-
struct io_wq_work *cwork)
973+
struct work_match *match)
954974
{
955975
struct io_wq_work_node *node, *prev;
956976
struct io_wq_work *work;
957977
unsigned long flags;
958978
bool found = false;
959979

960-
cwork->flags |= IO_WQ_WORK_CANCEL;
961-
962980
/*
963981
* First check pending list, if we're lucky we can just remove it
964982
* from there. CANCEL_OK means that the work is returned as-new,
@@ -968,7 +986,7 @@ static enum io_wq_cancel io_wqe_cancel_work(struct io_wqe *wqe,
968986
wq_list_for_each(node, prev, &wqe->work_list) {
969987
work = container_of(node, struct io_wq_work, list);
970988

971-
if (work == cwork) {
989+
if (match->fn(work, match->data)) {
972990
wq_node_del(&wqe->work_list, node, prev);
973991
found = true;
974992
break;
@@ -989,20 +1007,60 @@ static enum io_wq_cancel io_wqe_cancel_work(struct io_wqe *wqe,
9891007
* completion will run normally in this case.
9901008
*/
9911009
rcu_read_lock();
992-
found = io_wq_for_each_worker(wqe, io_wq_worker_cancel, cwork);
1010+
found = io_wq_for_each_worker(wqe, io_wq_worker_cancel, match);
9931011
rcu_read_unlock();
9941012
return found ? IO_WQ_CANCEL_RUNNING : IO_WQ_CANCEL_NOTFOUND;
9951013
}
9961014

1015+
static bool io_wq_work_match(struct io_wq_work *work, void *data)
1016+
{
1017+
return work == data;
1018+
}
1019+
9971020
enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork)
9981021
{
1022+
struct work_match match = {
1023+
.fn = io_wq_work_match,
1024+
.data = cwork
1025+
};
9991026
enum io_wq_cancel ret = IO_WQ_CANCEL_NOTFOUND;
10001027
int node;
10011028

1029+
cwork->flags |= IO_WQ_WORK_CANCEL;
1030+
10021031
for_each_node(node) {
10031032
struct io_wqe *wqe = wq->wqes[node];
10041033

1005-
ret = io_wqe_cancel_work(wqe, cwork);
1034+
ret = io_wqe_cancel_work(wqe, &match);
1035+
if (ret != IO_WQ_CANCEL_NOTFOUND)
1036+
break;
1037+
}
1038+
1039+
return ret;
1040+
}
1041+
1042+
static bool io_wq_pid_match(struct io_wq_work *work, void *data)
1043+
{
1044+
pid_t pid = (pid_t) (unsigned long) data;
1045+
1046+
if (work)
1047+
return work->task_pid == pid;
1048+
return false;
1049+
}
1050+
1051+
enum io_wq_cancel io_wq_cancel_pid(struct io_wq *wq, pid_t pid)
1052+
{
1053+
struct work_match match = {
1054+
.fn = io_wq_pid_match,
1055+
.data = (void *) (unsigned long) pid
1056+
};
1057+
enum io_wq_cancel ret = IO_WQ_CANCEL_NOTFOUND;
1058+
int node;
1059+
1060+
for_each_node(node) {
1061+
struct io_wqe *wqe = wq->wqes[node];
1062+
1063+
ret = io_wqe_cancel_work(wqe, &match);
10061064
if (ret != IO_WQ_CANCEL_NOTFOUND)
10071065
break;
10081066
}
@@ -1036,6 +1094,8 @@ void io_wq_flush(struct io_wq *wq)
10361094
for_each_node(node) {
10371095
struct io_wqe *wqe = wq->wqes[node];
10381096

1097+
if (!node_online(node))
1098+
continue;
10391099
init_completion(&data.done);
10401100
INIT_IO_WORK(&data.work, io_wq_flush_func);
10411101
data.work.flags |= IO_WQ_WORK_INTERNAL;
@@ -1067,20 +1127,22 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
10671127

10681128
for_each_node(node) {
10691129
struct io_wqe *wqe;
1130+
int alloc_node = node;
10701131

1071-
wqe = kzalloc_node(sizeof(struct io_wqe), GFP_KERNEL, node);
1132+
if (!node_online(alloc_node))
1133+
alloc_node = NUMA_NO_NODE;
1134+
wqe = kzalloc_node(sizeof(struct io_wqe), GFP_KERNEL, alloc_node);
10721135
if (!wqe)
10731136
goto err;
10741137
wq->wqes[node] = wqe;
1075-
wqe->node = node;
1138+
wqe->node = alloc_node;
10761139
wqe->acct[IO_WQ_ACCT_BOUND].max_workers = bounded;
10771140
atomic_set(&wqe->acct[IO_WQ_ACCT_BOUND].nr_running, 0);
10781141
if (wq->user) {
10791142
wqe->acct[IO_WQ_ACCT_UNBOUND].max_workers =
10801143
task_rlimit(current, RLIMIT_NPROC);
10811144
}
10821145
atomic_set(&wqe->acct[IO_WQ_ACCT_UNBOUND].nr_running, 0);
1083-
wqe->node = node;
10841146
wqe->wq = wq;
10851147
spin_lock_init(&wqe->lock);
10861148
INIT_WQ_LIST(&wqe->work_list);

fs/io-wq.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,17 +74,20 @@ struct io_wq_work {
7474
struct files_struct *files;
7575
struct mm_struct *mm;
7676
const struct cred *creds;
77+
struct fs_struct *fs;
7778
unsigned flags;
79+
pid_t task_pid;
7880
};
7981

8082
#define INIT_IO_WORK(work, _func) \
8183
do { \
8284
(work)->list.next = NULL; \
8385
(work)->func = _func; \
84-
(work)->flags = 0; \
8586
(work)->files = NULL; \
8687
(work)->mm = NULL; \
8788
(work)->creds = NULL; \
89+
(work)->fs = NULL; \
90+
(work)->flags = 0; \
8891
} while (0) \
8992

9093
typedef void (get_work_fn)(struct io_wq_work *);
@@ -107,6 +110,7 @@ void io_wq_flush(struct io_wq *wq);
107110

108111
void io_wq_cancel_all(struct io_wq *wq);
109112
enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork);
113+
enum io_wq_cancel io_wq_cancel_pid(struct io_wq *wq, pid_t pid);
110114

111115
typedef bool (work_cancel_fn)(struct io_wq_work *, void *);
112116

0 commit comments

Comments
 (0)