Skip to content

Commit 42995ce

Browse files
committed
Merge tag 'io_uring-5.14-2021-08-13' of git://git.kernel.dk/linux-block
Pull io_uring fixes from Jens Axboe: "A bit bigger than the previous weeks, but mostly just a few stable bound fixes. In detail: - Followup fixes to patches from last week for io-wq, turns out they weren't complete (Hao) - Two lockdep reported fixes out of the RT camp (me) - Sync the io_uring-cp example with liburing, as a few bug fixes never made it to the kernel carried version (me) - SQPOLL related TIF_NOTIFY_SIGNAL fix (Nadav) - Use WRITE_ONCE() when writing sq flags (Nadav) - io_rsrc_put_work() deadlock fix (Pavel)" * tag 'io_uring-5.14-2021-08-13' of git://git.kernel.dk/linux-block: tools/io_uring/io_uring-cp: sync with liburing example io_uring: fix ctx-exit io_rsrc_put_work() deadlock io_uring: drop ctx->uring_lock before flushing work item io-wq: fix IO_WORKER_F_FIXED issue in create_io_worker() io-wq: fix bug of creating io-wokers unconditionally io_uring: rsrc ref lock needs to be IRQ safe io_uring: Use WRITE_ONCE() when writing to sq_flags io_uring: clear TIF_NOTIFY_SIGNAL when running task work
2 parents 462938c + 8f40d03 commit 42995ce

File tree

3 files changed

+75
-40
lines changed

3 files changed

+75
-40
lines changed

fs/io-wq.c

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ struct io_cb_cancel_data {
129129
bool cancel_all;
130130
};
131131

132-
static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index);
132+
static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index, bool first);
133133
static void io_wqe_dec_running(struct io_worker *worker);
134134

135135
static bool io_worker_get(struct io_worker *worker)
@@ -248,18 +248,20 @@ static void io_wqe_wake_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
248248
rcu_read_unlock();
249249

250250
if (!ret) {
251-
bool do_create = false;
251+
bool do_create = false, first = false;
252252

253253
raw_spin_lock_irq(&wqe->lock);
254254
if (acct->nr_workers < acct->max_workers) {
255255
atomic_inc(&acct->nr_running);
256256
atomic_inc(&wqe->wq->worker_refs);
257+
if (!acct->nr_workers)
258+
first = true;
257259
acct->nr_workers++;
258260
do_create = true;
259261
}
260262
raw_spin_unlock_irq(&wqe->lock);
261263
if (do_create)
262-
create_io_worker(wqe->wq, wqe, acct->index);
264+
create_io_worker(wqe->wq, wqe, acct->index, first);
263265
}
264266
}
265267

@@ -282,16 +284,26 @@ static void create_worker_cb(struct callback_head *cb)
282284
struct io_wq *wq;
283285
struct io_wqe *wqe;
284286
struct io_wqe_acct *acct;
287+
bool do_create = false, first = false;
285288

286289
cwd = container_of(cb, struct create_worker_data, work);
287290
wqe = cwd->wqe;
288291
wq = wqe->wq;
289292
acct = &wqe->acct[cwd->index];
290293
raw_spin_lock_irq(&wqe->lock);
291-
if (acct->nr_workers < acct->max_workers)
294+
if (acct->nr_workers < acct->max_workers) {
295+
if (!acct->nr_workers)
296+
first = true;
292297
acct->nr_workers++;
298+
do_create = true;
299+
}
293300
raw_spin_unlock_irq(&wqe->lock);
294-
create_io_worker(wq, cwd->wqe, cwd->index);
301+
if (do_create) {
302+
create_io_worker(wq, wqe, cwd->index, first);
303+
} else {
304+
atomic_dec(&acct->nr_running);
305+
io_worker_ref_put(wq);
306+
}
295307
kfree(cwd);
296308
}
297309

@@ -629,7 +641,7 @@ void io_wq_worker_sleeping(struct task_struct *tsk)
629641
raw_spin_unlock_irq(&worker->wqe->lock);
630642
}
631643

632-
static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
644+
static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index, bool first)
633645
{
634646
struct io_wqe_acct *acct = &wqe->acct[index];
635647
struct io_worker *worker;
@@ -670,7 +682,7 @@ static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
670682
worker->flags |= IO_WORKER_F_FREE;
671683
if (index == IO_WQ_ACCT_BOUND)
672684
worker->flags |= IO_WORKER_F_BOUND;
673-
if ((acct->nr_workers == 1) && (worker->flags & IO_WORKER_F_BOUND))
685+
if (first && (worker->flags & IO_WORKER_F_BOUND))
674686
worker->flags |= IO_WORKER_F_FIXED;
675687
raw_spin_unlock_irq(&wqe->lock);
676688
wake_up_new_task(tsk);

fs/io_uring.c

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@
7878
#include <linux/task_work.h>
7979
#include <linux/pagemap.h>
8080
#include <linux/io_uring.h>
81+
#include <linux/tracehook.h>
8182

8283
#define CREATE_TRACE_POINTS
8384
#include <trace/events/io_uring.h>
@@ -1499,7 +1500,8 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
14991500
all_flushed = list_empty(&ctx->cq_overflow_list);
15001501
if (all_flushed) {
15011502
clear_bit(0, &ctx->check_cq_overflow);
1502-
ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW;
1503+
WRITE_ONCE(ctx->rings->sq_flags,
1504+
ctx->rings->sq_flags & ~IORING_SQ_CQ_OVERFLOW);
15031505
}
15041506

15051507
if (posted)
@@ -1578,7 +1580,9 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
15781580
}
15791581
if (list_empty(&ctx->cq_overflow_list)) {
15801582
set_bit(0, &ctx->check_cq_overflow);
1581-
ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW;
1583+
WRITE_ONCE(ctx->rings->sq_flags,
1584+
ctx->rings->sq_flags | IORING_SQ_CQ_OVERFLOW);
1585+
15821586
}
15831587
ocqe->cqe.user_data = user_data;
15841588
ocqe->cqe.res = res;
@@ -2222,9 +2226,9 @@ static inline unsigned int io_put_rw_kbuf(struct io_kiocb *req)
22222226

22232227
static inline bool io_run_task_work(void)
22242228
{
2225-
if (current->task_works) {
2229+
if (test_thread_flag(TIF_NOTIFY_SIGNAL) || current->task_works) {
22262230
__set_current_state(TASK_RUNNING);
2227-
task_work_run();
2231+
tracehook_notify_signal();
22282232
return true;
22292233
}
22302234

@@ -6803,14 +6807,16 @@ static inline void io_ring_set_wakeup_flag(struct io_ring_ctx *ctx)
68036807
{
68046808
/* Tell userspace we may need a wakeup call */
68056809
spin_lock_irq(&ctx->completion_lock);
6806-
ctx->rings->sq_flags |= IORING_SQ_NEED_WAKEUP;
6810+
WRITE_ONCE(ctx->rings->sq_flags,
6811+
ctx->rings->sq_flags | IORING_SQ_NEED_WAKEUP);
68076812
spin_unlock_irq(&ctx->completion_lock);
68086813
}
68096814

68106815
static inline void io_ring_clear_wakeup_flag(struct io_ring_ctx *ctx)
68116816
{
68126817
spin_lock_irq(&ctx->completion_lock);
6813-
ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP;
6818+
WRITE_ONCE(ctx->rings->sq_flags,
6819+
ctx->rings->sq_flags & ~IORING_SQ_NEED_WAKEUP);
68146820
spin_unlock_irq(&ctx->completion_lock);
68156821
}
68166822

@@ -7132,16 +7138,6 @@ static void **io_alloc_page_table(size_t size)
71327138
return table;
71337139
}
71347140

7135-
static inline void io_rsrc_ref_lock(struct io_ring_ctx *ctx)
7136-
{
7137-
spin_lock_bh(&ctx->rsrc_ref_lock);
7138-
}
7139-
7140-
static inline void io_rsrc_ref_unlock(struct io_ring_ctx *ctx)
7141-
{
7142-
spin_unlock_bh(&ctx->rsrc_ref_lock);
7143-
}
7144-
71457141
static void io_rsrc_node_destroy(struct io_rsrc_node *ref_node)
71467142
{
71477143
percpu_ref_exit(&ref_node->refs);
@@ -7158,9 +7154,9 @@ static void io_rsrc_node_switch(struct io_ring_ctx *ctx,
71587154
struct io_rsrc_node *rsrc_node = ctx->rsrc_node;
71597155

71607156
rsrc_node->rsrc_data = data_to_kill;
7161-
io_rsrc_ref_lock(ctx);
7157+
spin_lock_irq(&ctx->rsrc_ref_lock);
71627158
list_add_tail(&rsrc_node->node, &ctx->rsrc_ref_list);
7163-
io_rsrc_ref_unlock(ctx);
7159+
spin_unlock_irq(&ctx->rsrc_ref_lock);
71647160

71657161
atomic_inc(&data_to_kill->refs);
71667162
percpu_ref_kill(&rsrc_node->refs);
@@ -7199,17 +7195,19 @@ static int io_rsrc_ref_quiesce(struct io_rsrc_data *data, struct io_ring_ctx *ct
71997195
/* kill initial ref, already quiesced if zero */
72007196
if (atomic_dec_and_test(&data->refs))
72017197
break;
7198+
mutex_unlock(&ctx->uring_lock);
72027199
flush_delayed_work(&ctx->rsrc_put_work);
72037200
ret = wait_for_completion_interruptible(&data->done);
7204-
if (!ret)
7201+
if (!ret) {
7202+
mutex_lock(&ctx->uring_lock);
72057203
break;
7204+
}
72067205

72077206
atomic_inc(&data->refs);
72087207
/* wait for all works potentially completing data->done */
72097208
flush_delayed_work(&ctx->rsrc_put_work);
72107209
reinit_completion(&data->done);
72117210

7212-
mutex_unlock(&ctx->uring_lock);
72137211
ret = io_run_task_work_sig();
72147212
mutex_lock(&ctx->uring_lock);
72157213
} while (ret >= 0);
@@ -7668,9 +7666,10 @@ static void io_rsrc_node_ref_zero(struct percpu_ref *ref)
76687666
{
76697667
struct io_rsrc_node *node = container_of(ref, struct io_rsrc_node, refs);
76707668
struct io_ring_ctx *ctx = node->rsrc_data->ctx;
7669+
unsigned long flags;
76717670
bool first_add = false;
76727671

7673-
io_rsrc_ref_lock(ctx);
7672+
spin_lock_irqsave(&ctx->rsrc_ref_lock, flags);
76747673
node->done = true;
76757674

76767675
while (!list_empty(&ctx->rsrc_ref_list)) {
@@ -7682,7 +7681,7 @@ static void io_rsrc_node_ref_zero(struct percpu_ref *ref)
76827681
list_del(&node->node);
76837682
first_add |= llist_add(&node->llist, &ctx->rsrc_put_llist);
76847683
}
7685-
io_rsrc_ref_unlock(ctx);
7684+
spin_unlock_irqrestore(&ctx->rsrc_ref_lock, flags);
76867685

76877686
if (first_add)
76887687
mod_delayed_work(system_wq, &ctx->rsrc_put_work, HZ);
@@ -8653,13 +8652,10 @@ static void io_req_caches_free(struct io_ring_ctx *ctx)
86538652
mutex_unlock(&ctx->uring_lock);
86548653
}
86558654

8656-
static bool io_wait_rsrc_data(struct io_rsrc_data *data)
8655+
static void io_wait_rsrc_data(struct io_rsrc_data *data)
86578656
{
8658-
if (!data)
8659-
return false;
8660-
if (!atomic_dec_and_test(&data->refs))
8657+
if (data && !atomic_dec_and_test(&data->refs))
86618658
wait_for_completion(&data->done);
8662-
return true;
86638659
}
86648660

86658661
static void io_ring_ctx_free(struct io_ring_ctx *ctx)
@@ -8671,10 +8667,14 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
86718667
ctx->mm_account = NULL;
86728668
}
86738669

8670+
/* __io_rsrc_put_work() may need uring_lock to progress, wait w/o it */
8671+
io_wait_rsrc_data(ctx->buf_data);
8672+
io_wait_rsrc_data(ctx->file_data);
8673+
86748674
mutex_lock(&ctx->uring_lock);
8675-
if (io_wait_rsrc_data(ctx->buf_data))
8675+
if (ctx->buf_data)
86768676
__io_sqe_buffers_unregister(ctx);
8677-
if (io_wait_rsrc_data(ctx->file_data))
8677+
if (ctx->file_data)
86788678
__io_sqe_files_unregister(ctx);
86798679
if (ctx->rings)
86808680
__io_cqring_overflow_flush(ctx, true);

tools/io_uring/io_uring-cp.c

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -131,8 +131,7 @@ static int copy_file(struct io_uring *ring, off_t insize)
131131
writes = reads = offset = 0;
132132

133133
while (insize || write_left) {
134-
unsigned long had_reads;
135-
int got_comp;
134+
int had_reads, got_comp;
136135

137136
/*
138137
* Queue up as many reads as we can
@@ -174,8 +173,13 @@ static int copy_file(struct io_uring *ring, off_t insize)
174173
if (!got_comp) {
175174
ret = io_uring_wait_cqe(ring, &cqe);
176175
got_comp = 1;
177-
} else
176+
} else {
178177
ret = io_uring_peek_cqe(ring, &cqe);
178+
if (ret == -EAGAIN) {
179+
cqe = NULL;
180+
ret = 0;
181+
}
182+
}
179183
if (ret < 0) {
180184
fprintf(stderr, "io_uring_peek_cqe: %s\n",
181185
strerror(-ret));
@@ -194,7 +198,7 @@ static int copy_file(struct io_uring *ring, off_t insize)
194198
fprintf(stderr, "cqe failed: %s\n",
195199
strerror(-cqe->res));
196200
return 1;
197-
} else if ((size_t) cqe->res != data->iov.iov_len) {
201+
} else if (cqe->res != data->iov.iov_len) {
198202
/* Short read/write, adjust and requeue */
199203
data->iov.iov_base += cqe->res;
200204
data->iov.iov_len -= cqe->res;
@@ -221,6 +225,25 @@ static int copy_file(struct io_uring *ring, off_t insize)
221225
}
222226
}
223227

228+
/* wait out pending writes */
229+
while (writes) {
230+
struct io_data *data;
231+
232+
ret = io_uring_wait_cqe(ring, &cqe);
233+
if (ret) {
234+
fprintf(stderr, "wait_cqe=%d\n", ret);
235+
return 1;
236+
}
237+
if (cqe->res < 0) {
238+
fprintf(stderr, "write res=%d\n", cqe->res);
239+
return 1;
240+
}
241+
data = io_uring_cqe_get_data(cqe);
242+
free(data);
243+
writes--;
244+
io_uring_cqe_seen(ring, cqe);
245+
}
246+
224247
return 0;
225248
}
226249

0 commit comments

Comments
 (0)