Skip to content

Commit 7271ef3

Browse files
committed
io_uring: fix recursive completion locking on oveflow flush
syszbot reports a scenario where we recurse on the completion lock when flushing an overflow: 1 lock held by syz-executor287/6816: #0: ffff888093cdb4d8 (&ctx->completion_lock){....}-{2:2}, at: io_cqring_overflow_flush+0xc6/0xab0 fs/io_uring.c:1333 stack backtrace: CPU: 1 PID: 6816 Comm: syz-executor287 Not tainted 5.8.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1f0/0x31e lib/dump_stack.c:118 print_deadlock_bug kernel/locking/lockdep.c:2391 [inline] check_deadlock kernel/locking/lockdep.c:2432 [inline] validate_chain+0x69a4/0x88a0 kernel/locking/lockdep.c:3202 __lock_acquire+0x1161/0x2ab0 kernel/locking/lockdep.c:4426 lock_acquire+0x160/0x730 kernel/locking/lockdep.c:5005 __raw_spin_lock_irq include/linux/spinlock_api_smp.h:128 [inline] _raw_spin_lock_irq+0x67/0x80 kernel/locking/spinlock.c:167 spin_lock_irq include/linux/spinlock.h:379 [inline] io_queue_linked_timeout fs/io_uring.c:5928 [inline] __io_queue_async_work fs/io_uring.c:1192 [inline] __io_queue_deferred+0x36a/0x790 fs/io_uring.c:1237 io_cqring_overflow_flush+0x774/0xab0 fs/io_uring.c:1359 io_ring_ctx_wait_and_kill+0x2a1/0x570 fs/io_uring.c:7808 io_uring_release+0x59/0x70 fs/io_uring.c:7829 __fput+0x34f/0x7b0 fs/file_table.c:281 task_work_run+0x137/0x1c0 kernel/task_work.c:135 exit_task_work include/linux/task_work.h:25 [inline] do_exit+0x5f3/0x1f20 kernel/exit.c:806 do_group_exit+0x161/0x2d0 kernel/exit.c:903 __do_sys_exit_group+0x13/0x20 kernel/exit.c:914 __se_sys_exit_group+0x10/0x10 kernel/exit.c:912 __x64_sys_exit_group+0x37/0x40 kernel/exit.c:912 do_syscall_64+0x31/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fix this by passing back the link from __io_queue_async_work(), and then let the caller handle the queueing of the link. Take care to also punt the submission reference put to the caller, as we're holding the completion lock for the __io_queue_defer() case. Hence we need to mark the io_kiocb appropriately for that case. Reported-by: [email protected] Signed-off-by: Jens Axboe <[email protected]>
1 parent 0ba9c9e commit 7271ef3

File tree

1 file changed

+26
-10
lines changed

1 file changed

+26
-10
lines changed

fs/io_uring.c

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -898,6 +898,7 @@ static void io_put_req(struct io_kiocb *req);
898898
static void io_double_put_req(struct io_kiocb *req);
899899
static void __io_double_put_req(struct io_kiocb *req);
900900
static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req);
901+
static void __io_queue_linked_timeout(struct io_kiocb *req);
901902
static void io_queue_linked_timeout(struct io_kiocb *req);
902903
static int __io_sqe_files_update(struct io_ring_ctx *ctx,
903904
struct io_uring_files_update *ip,
@@ -1179,24 +1180,27 @@ static void io_prep_async_link(struct io_kiocb *req)
11791180
io_prep_async_work(cur);
11801181
}
11811182

1182-
static void __io_queue_async_work(struct io_kiocb *req)
1183+
static struct io_kiocb *__io_queue_async_work(struct io_kiocb *req)
11831184
{
11841185
struct io_ring_ctx *ctx = req->ctx;
11851186
struct io_kiocb *link = io_prep_linked_timeout(req);
11861187

11871188
trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req,
11881189
&req->work, req->flags);
11891190
io_wq_enqueue(ctx->io_wq, &req->work);
1190-
1191-
if (link)
1192-
io_queue_linked_timeout(link);
1191+
return link;
11931192
}
11941193

11951194
static void io_queue_async_work(struct io_kiocb *req)
11961195
{
1196+
struct io_kiocb *link;
1197+
11971198
/* init ->work of the whole link before punting */
11981199
io_prep_async_link(req);
1199-
__io_queue_async_work(req);
1200+
link = __io_queue_async_work(req);
1201+
1202+
if (link)
1203+
io_queue_linked_timeout(link);
12001204
}
12011205

12021206
static void io_kill_timeout(struct io_kiocb *req)
@@ -1229,12 +1233,19 @@ static void __io_queue_deferred(struct io_ring_ctx *ctx)
12291233
do {
12301234
struct io_defer_entry *de = list_first_entry(&ctx->defer_list,
12311235
struct io_defer_entry, list);
1236+
struct io_kiocb *link;
12321237

12331238
if (req_need_defer(de->req, de->seq))
12341239
break;
12351240
list_del_init(&de->list);
12361241
/* punt-init is done before queueing for defer */
1237-
__io_queue_async_work(de->req);
1242+
link = __io_queue_async_work(de->req);
1243+
if (link) {
1244+
__io_queue_linked_timeout(link);
1245+
/* drop submission reference */
1246+
link->flags |= REQ_F_COMP_LOCKED;
1247+
io_put_req(link);
1248+
}
12381249
kfree(de);
12391250
} while (!list_empty(&ctx->defer_list));
12401251
}
@@ -5939,22 +5950,27 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
59395950
return HRTIMER_NORESTART;
59405951
}
59415952

5942-
static void io_queue_linked_timeout(struct io_kiocb *req)
5953+
static void __io_queue_linked_timeout(struct io_kiocb *req)
59435954
{
5944-
struct io_ring_ctx *ctx = req->ctx;
5945-
59465955
/*
59475956
* If the list is now empty, then our linked request finished before
59485957
* we got a chance to setup the timer
59495958
*/
5950-
spin_lock_irq(&ctx->completion_lock);
59515959
if (!list_empty(&req->link_list)) {
59525960
struct io_timeout_data *data = &req->io->timeout;
59535961

59545962
data->timer.function = io_link_timeout_fn;
59555963
hrtimer_start(&data->timer, timespec64_to_ktime(data->ts),
59565964
data->mode);
59575965
}
5966+
}
5967+
5968+
static void io_queue_linked_timeout(struct io_kiocb *req)
5969+
{
5970+
struct io_ring_ctx *ctx = req->ctx;
5971+
5972+
spin_lock_irq(&ctx->completion_lock);
5973+
__io_queue_linked_timeout(req);
59585974
spin_unlock_irq(&ctx->completion_lock);
59595975

59605976
/* drop submission reference */

0 commit comments

Comments
 (0)