Skip to content

Commit 9e138a4

Browse files
isilenceaxboe
authored andcommitted
io_uring: fix concurrent parking
If io_sq_thread_park() of one task got rescheduled right after set_bit(), before it gets back to mutex_lock() there can happen park()/unpark() by another task with SQPOLL locking again and continuing running never seeing that first set_bit(SHOULD_PARK), so won't even try to put the mutex down for parking. It will get parked eventually when SQPOLL drops the lock for reschedule, but may be problematic and will get in the way of further fixes. Account number of tasks waiting for parking with a new atomic variable park_pending and adjust SHOULD_PARK accordingly. It doesn't entirely replaces SHOULD_PARK bit with this atomic var because it's convenient to have it as a bit in the state and will help to do optimisations later. Signed-off-by: Pavel Begunkov <[email protected]> Signed-off-by: Jens Axboe <[email protected]>
1 parent f6d5425 commit 9e138a4

File tree

1 file changed

+11
-2
lines changed

1 file changed

+11
-2
lines changed

fs/io_uring.c

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,7 @@ enum {
258258

259259
struct io_sq_data {
260260
refcount_t refs;
261+
atomic_t park_pending;
261262
struct mutex lock;
262263

263264
/* ctx's that are using this sqd */
@@ -7067,7 +7068,13 @@ static void io_sq_thread_unpark(struct io_sq_data *sqd)
70677068
{
70687069
WARN_ON_ONCE(sqd->thread == current);
70697070

7071+
/*
7072+
* Do the dance but not conditional clear_bit() because it'd race with
7073+
* other threads incrementing park_pending and setting the bit.
7074+
*/
70707075
clear_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
7076+
if (atomic_dec_return(&sqd->park_pending))
7077+
set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
70717078
mutex_unlock(&sqd->lock);
70727079
}
70737080

@@ -7076,10 +7083,9 @@ static void io_sq_thread_park(struct io_sq_data *sqd)
70767083
{
70777084
WARN_ON_ONCE(sqd->thread == current);
70787085

7086+
atomic_inc(&sqd->park_pending);
70797087
set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
70807088
mutex_lock(&sqd->lock);
7081-
/* set again for consistency, in case concurrent parks are happening */
7082-
set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
70837089
if (sqd->thread)
70847090
wake_up_process(sqd->thread);
70857091
}
@@ -7099,6 +7105,8 @@ static void io_sq_thread_stop(struct io_sq_data *sqd)
70997105
static void io_put_sq_data(struct io_sq_data *sqd)
71007106
{
71017107
if (refcount_dec_and_test(&sqd->refs)) {
7108+
WARN_ON_ONCE(atomic_read(&sqd->park_pending));
7109+
71027110
io_sq_thread_stop(sqd);
71037111
kfree(sqd);
71047112
}
@@ -7172,6 +7180,7 @@ static struct io_sq_data *io_get_sq_data(struct io_uring_params *p,
71727180
if (!sqd)
71737181
return ERR_PTR(-ENOMEM);
71747182

7183+
atomic_set(&sqd->park_pending, 0);
71757184
refcount_set(&sqd->refs, 1);
71767185
INIT_LIST_HEAD(&sqd->ctx_list);
71777186
mutex_init(&sqd->lock);

0 commit comments

Comments
 (0)