Skip to content

Commit 692b482

Browse files
committed
workqueue: replace pool->manager_arb mutex with a flag
Josef reported a HARDIRQ-safe -> HARDIRQ-unsafe lock order detected by lockdep: [ 1270.472259] WARNING: HARDIRQ-safe -> HARDIRQ-unsafe lock order detected [ 1270.472783] 4.14.0-rc1-xfstests-12888-g76833e8 #110 Not tainted [ 1270.473240] ----------------------------------------------------- [ 1270.473710] kworker/u5:2/5157 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire: [ 1270.474239] (&(&lock->wait_lock)->rlock){+.+.}, at: [<ffffffff8da253d2>] __mutex_unlock_slowpath+0xa2/0x280 [ 1270.474994] [ 1270.474994] and this task is already holding: [ 1270.475440] (&pool->lock/1){-.-.}, at: [<ffffffff8d2992f6>] worker_thread+0x366/0x3c0 [ 1270.476046] which would create a new lock dependency: [ 1270.476436] (&pool->lock/1){-.-.} -> (&(&lock->wait_lock)->rlock){+.+.} [ 1270.476949] [ 1270.476949] but this new dependency connects a HARDIRQ-irq-safe lock: [ 1270.477553] (&pool->lock/1){-.-.} ... [ 1270.488900] to a HARDIRQ-irq-unsafe lock: [ 1270.489327] (&(&lock->wait_lock)->rlock){+.+.} ... [ 1270.494735] Possible interrupt unsafe locking scenario: [ 1270.494735] [ 1270.495250] CPU0 CPU1 [ 1270.495600] ---- ---- [ 1270.495947] lock(&(&lock->wait_lock)->rlock); [ 1270.496295] local_irq_disable(); [ 1270.496753] lock(&pool->lock/1); [ 1270.497205] lock(&(&lock->wait_lock)->rlock); [ 1270.497744] <Interrupt> [ 1270.497948] lock(&pool->lock/1); , which will cause a irq inversion deadlock if the above lock scenario happens. The root cause of this safe -> unsafe lock order is the mutex_unlock(pool->manager_arb) in manage_workers() with pool->lock held. Unlocking mutex while holding an irq spinlock was never safe and this problem has been around forever but it never got noticed because the only time the mutex is usually trylocked while holding irqlock making actual failures very unlikely and lockdep annotation missed the condition until the recent b9c16a0 ("locking/mutex: Fix lockdep_assert_held() fail"). Using mutex for pool->manager_arb has always been a bit of stretch. It primarily is an mechanism to arbitrate managership between workers which can easily be done with a pool flag. The only reason it became a mutex is that pool destruction path wants to exclude parallel managing operations. This patch replaces the mutex with a new pool flag POOL_MANAGER_ACTIVE and make the destruction path wait for the current manager on a wait queue. v2: Drop unnecessary flag clearing before pool destruction as suggested by Boqun. Signed-off-by: Tejun Heo <[email protected]> Reported-by: Josef Bacik <[email protected]> Reviewed-by: Lai Jiangshan <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Boqun Feng <[email protected]> Cc: [email protected]
1 parent 47684e1 commit 692b482

File tree

1 file changed

+15
-22
lines changed

1 file changed

+15
-22
lines changed

kernel/workqueue.c

Lines changed: 15 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ enum {
6868
* attach_mutex to avoid changing binding state while
6969
* worker_attach_to_pool() is in progress.
7070
*/
71+
POOL_MANAGER_ACTIVE = 1 << 0, /* being managed */
7172
POOL_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */
7273

7374
/* worker flags */
@@ -165,7 +166,6 @@ struct worker_pool {
165166
/* L: hash of busy workers */
166167

167168
/* see manage_workers() for details on the two manager mutexes */
168-
struct mutex manager_arb; /* manager arbitration */
169169
struct worker *manager; /* L: purely informational */
170170
struct mutex attach_mutex; /* attach/detach exclusion */
171171
struct list_head workers; /* A: attached workers */
@@ -299,6 +299,7 @@ static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf;
299299

300300
static DEFINE_MUTEX(wq_pool_mutex); /* protects pools and workqueues list */
301301
static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */
302+
static DECLARE_WAIT_QUEUE_HEAD(wq_manager_wait); /* wait for manager to go away */
302303

303304
static LIST_HEAD(workqueues); /* PR: list of all workqueues */
304305
static bool workqueue_freezing; /* PL: have wqs started freezing? */
@@ -801,7 +802,7 @@ static bool need_to_create_worker(struct worker_pool *pool)
801802
/* Do we have too many workers and should some go away? */
802803
static bool too_many_workers(struct worker_pool *pool)
803804
{
804-
bool managing = mutex_is_locked(&pool->manager_arb);
805+
bool managing = pool->flags & POOL_MANAGER_ACTIVE;
805806
int nr_idle = pool->nr_idle + managing; /* manager is considered idle */
806807
int nr_busy = pool->nr_workers - nr_idle;
807808

@@ -1980,24 +1981,17 @@ static bool manage_workers(struct worker *worker)
19801981
{
19811982
struct worker_pool *pool = worker->pool;
19821983

1983-
/*
1984-
* Anyone who successfully grabs manager_arb wins the arbitration
1985-
* and becomes the manager. mutex_trylock() on pool->manager_arb
1986-
* failure while holding pool->lock reliably indicates that someone
1987-
* else is managing the pool and the worker which failed trylock
1988-
* can proceed to executing work items. This means that anyone
1989-
* grabbing manager_arb is responsible for actually performing
1990-
* manager duties. If manager_arb is grabbed and released without
1991-
* actual management, the pool may stall indefinitely.
1992-
*/
1993-
if (!mutex_trylock(&pool->manager_arb))
1984+
if (pool->flags & POOL_MANAGER_ACTIVE)
19941985
return false;
1986+
1987+
pool->flags |= POOL_MANAGER_ACTIVE;
19951988
pool->manager = worker;
19961989

19971990
maybe_create_worker(pool);
19981991

19991992
pool->manager = NULL;
2000-
mutex_unlock(&pool->manager_arb);
1993+
pool->flags &= ~POOL_MANAGER_ACTIVE;
1994+
wake_up(&wq_manager_wait);
20011995
return true;
20021996
}
20031997

@@ -3248,7 +3242,6 @@ static int init_worker_pool(struct worker_pool *pool)
32483242
setup_timer(&pool->mayday_timer, pool_mayday_timeout,
32493243
(unsigned long)pool);
32503244

3251-
mutex_init(&pool->manager_arb);
32523245
mutex_init(&pool->attach_mutex);
32533246
INIT_LIST_HEAD(&pool->workers);
32543247

@@ -3318,13 +3311,15 @@ static void put_unbound_pool(struct worker_pool *pool)
33183311
hash_del(&pool->hash_node);
33193312

33203313
/*
3321-
* Become the manager and destroy all workers. Grabbing
3322-
* manager_arb prevents @pool's workers from blocking on
3323-
* attach_mutex.
3314+
* Become the manager and destroy all workers. This prevents
3315+
* @pool's workers from blocking on attach_mutex. We're the last
3316+
* manager and @pool gets freed with the flag set.
33243317
*/
3325-
mutex_lock(&pool->manager_arb);
3326-
33273318
spin_lock_irq(&pool->lock);
3319+
wait_event_lock_irq(wq_manager_wait,
3320+
!(pool->flags & POOL_MANAGER_ACTIVE), pool->lock);
3321+
pool->flags |= POOL_MANAGER_ACTIVE;
3322+
33283323
while ((worker = first_idle_worker(pool)))
33293324
destroy_worker(worker);
33303325
WARN_ON(pool->nr_workers || pool->nr_idle);
@@ -3338,8 +3333,6 @@ static void put_unbound_pool(struct worker_pool *pool)
33383333
if (pool->detach_completion)
33393334
wait_for_completion(pool->detach_completion);
33403335

3341-
mutex_unlock(&pool->manager_arb);
3342-
33433336
/* shut down the timers */
33443337
del_timer_sync(&pool->idle_timer);
33453338
del_timer_sync(&pool->mayday_timer);

0 commit comments

Comments
 (0)