Skip to content

Commit 06987da

Browse files
committed
Merge branch 'for-4.14-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq
Pull workqueue fix from Tejun Heo: "This is a fix for an old bug in workqueue. Workqueue used a mutex to arbitrate who gets to be the manager of a pool. When the manager role gets released, the mutex gets unlocked while holding the pool's irqsafe spinlock. This can lead to deadlocks as mutex's internal spinlock isn't irqsafe. This got discovered by recent fixes to mutex lockdep annotations. The fix is a bit invasive for rc6 but if anything were wrong with the fix it would likely have already blown up in -next, and we want the fix in -stable anyway" * 'for-4.14-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq: workqueue: replace pool->manager_arb mutex with a flag
2 parents 2f1b11c + 692b482 commit 06987da

File tree

1 file changed

+15
-22
lines changed

1 file changed

+15
-22
lines changed

kernel/workqueue.c

Lines changed: 15 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ enum {
6868
* attach_mutex to avoid changing binding state while
6969
* worker_attach_to_pool() is in progress.
7070
*/
71+
POOL_MANAGER_ACTIVE = 1 << 0, /* being managed */
7172
POOL_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */
7273

7374
/* worker flags */
@@ -165,7 +166,6 @@ struct worker_pool {
165166
/* L: hash of busy workers */
166167

167168
/* see manage_workers() for details on the two manager mutexes */
168-
struct mutex manager_arb; /* manager arbitration */
169169
struct worker *manager; /* L: purely informational */
170170
struct mutex attach_mutex; /* attach/detach exclusion */
171171
struct list_head workers; /* A: attached workers */
@@ -299,6 +299,7 @@ static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf;
299299

300300
static DEFINE_MUTEX(wq_pool_mutex); /* protects pools and workqueues list */
301301
static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */
302+
static DECLARE_WAIT_QUEUE_HEAD(wq_manager_wait); /* wait for manager to go away */
302303

303304
static LIST_HEAD(workqueues); /* PR: list of all workqueues */
304305
static bool workqueue_freezing; /* PL: have wqs started freezing? */
@@ -801,7 +802,7 @@ static bool need_to_create_worker(struct worker_pool *pool)
801802
/* Do we have too many workers and should some go away? */
802803
static bool too_many_workers(struct worker_pool *pool)
803804
{
804-
bool managing = mutex_is_locked(&pool->manager_arb);
805+
bool managing = pool->flags & POOL_MANAGER_ACTIVE;
805806
int nr_idle = pool->nr_idle + managing; /* manager is considered idle */
806807
int nr_busy = pool->nr_workers - nr_idle;
807808

@@ -1980,24 +1981,17 @@ static bool manage_workers(struct worker *worker)
19801981
{
19811982
struct worker_pool *pool = worker->pool;
19821983

1983-
/*
1984-
* Anyone who successfully grabs manager_arb wins the arbitration
1985-
* and becomes the manager. mutex_trylock() on pool->manager_arb
1986-
* failure while holding pool->lock reliably indicates that someone
1987-
* else is managing the pool and the worker which failed trylock
1988-
* can proceed to executing work items. This means that anyone
1989-
* grabbing manager_arb is responsible for actually performing
1990-
* manager duties. If manager_arb is grabbed and released without
1991-
* actual management, the pool may stall indefinitely.
1992-
*/
1993-
if (!mutex_trylock(&pool->manager_arb))
1984+
if (pool->flags & POOL_MANAGER_ACTIVE)
19941985
return false;
1986+
1987+
pool->flags |= POOL_MANAGER_ACTIVE;
19951988
pool->manager = worker;
19961989

19971990
maybe_create_worker(pool);
19981991

19991992
pool->manager = NULL;
2000-
mutex_unlock(&pool->manager_arb);
1993+
pool->flags &= ~POOL_MANAGER_ACTIVE;
1994+
wake_up(&wq_manager_wait);
20011995
return true;
20021996
}
20031997

@@ -3248,7 +3242,6 @@ static int init_worker_pool(struct worker_pool *pool)
32483242
setup_timer(&pool->mayday_timer, pool_mayday_timeout,
32493243
(unsigned long)pool);
32503244

3251-
mutex_init(&pool->manager_arb);
32523245
mutex_init(&pool->attach_mutex);
32533246
INIT_LIST_HEAD(&pool->workers);
32543247

@@ -3318,13 +3311,15 @@ static void put_unbound_pool(struct worker_pool *pool)
33183311
hash_del(&pool->hash_node);
33193312

33203313
/*
3321-
* Become the manager and destroy all workers. Grabbing
3322-
* manager_arb prevents @pool's workers from blocking on
3323-
* attach_mutex.
3314+
* Become the manager and destroy all workers. This prevents
3315+
* @pool's workers from blocking on attach_mutex. We're the last
3316+
* manager and @pool gets freed with the flag set.
33243317
*/
3325-
mutex_lock(&pool->manager_arb);
3326-
33273318
spin_lock_irq(&pool->lock);
3319+
wait_event_lock_irq(wq_manager_wait,
3320+
!(pool->flags & POOL_MANAGER_ACTIVE), pool->lock);
3321+
pool->flags |= POOL_MANAGER_ACTIVE;
3322+
33283323
while ((worker = first_idle_worker(pool)))
33293324
destroy_worker(worker);
33303325
WARN_ON(pool->nr_workers || pool->nr_idle);
@@ -3338,8 +3333,6 @@ static void put_unbound_pool(struct worker_pool *pool)
33383333
if (pool->detach_completion)
33393334
wait_for_completion(pool->detach_completion);
33403335

3341-
mutex_unlock(&pool->manager_arb);
3342-
33433336
/* shut down the timers */
33443337
del_timer_sync(&pool->idle_timer);
33453338
del_timer_sync(&pool->mayday_timer);

0 commit comments

Comments
 (0)