Skip to content

Commit 894d1b3

Browse files
author
Peter Zijlstra
committed
locking/mutex: Remove wakeups from under mutex::wait_lock
In preparation to nest mutex::wait_lock under rq::lock we need to remove wakeups from under it. Do this by utilizing wake_qs to defer the wakeup until after the lock is dropped. [Heavily changed after 55f036c ("locking: WW mutex cleanup") and 08295b3 ("locking: Implement an algorithm choice for Wound-Wait mutexes")] [jstultz: rebased to mainline, added extra wake_up_q & init to avoid hangs, similar to Connor's rework of this patch] Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Signed-off-by: Juri Lelli <[email protected]> Signed-off-by: John Stultz <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Reviewed-by: Metin Kaya <[email protected]> Acked-by: Davidlohr Bueso <[email protected]> Tested-by: K Prateek Nayak <[email protected]> Tested-by: Metin Kaya <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 7e019dc commit 894d1b3

File tree

9 files changed

+96
-39
lines changed

9 files changed

+96
-39
lines changed

kernel/futex/pi.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -922,6 +922,7 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl
922922
struct rt_mutex_waiter rt_waiter;
923923
struct futex_hash_bucket *hb;
924924
struct futex_q q = futex_q_init;
925+
DEFINE_WAKE_Q(wake_q);
925926
int res, ret;
926927

927928
if (!IS_ENABLED(CONFIG_FUTEX_PI))
@@ -1018,8 +1019,11 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl
10181019
* such that futex_unlock_pi() is guaranteed to observe the waiter when
10191020
* it sees the futex_q::pi_state.
10201021
*/
1021-
ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current);
1022+
ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current, &wake_q);
1023+
preempt_disable();
10221024
raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock);
1025+
wake_up_q(&wake_q);
1026+
preempt_enable();
10231027

10241028
if (ret) {
10251029
if (ret == 1)

kernel/locking/mutex.c

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -575,6 +575,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
575575
struct lockdep_map *nest_lock, unsigned long ip,
576576
struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx)
577577
{
578+
DEFINE_WAKE_Q(wake_q);
578579
struct mutex_waiter waiter;
579580
struct ww_mutex *ww;
580581
int ret;
@@ -625,7 +626,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
625626
*/
626627
if (__mutex_trylock(lock)) {
627628
if (ww_ctx)
628-
__ww_mutex_check_waiters(lock, ww_ctx);
629+
__ww_mutex_check_waiters(lock, ww_ctx, &wake_q);
629630

630631
goto skip_wait;
631632
}
@@ -645,7 +646,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
645646
* Add in stamp order, waking up waiters that must kill
646647
* themselves.
647648
*/
648-
ret = __ww_mutex_add_waiter(&waiter, lock, ww_ctx);
649+
ret = __ww_mutex_add_waiter(&waiter, lock, ww_ctx, &wake_q);
649650
if (ret)
650651
goto err_early_kill;
651652
}
@@ -681,6 +682,10 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
681682
}
682683

683684
raw_spin_unlock(&lock->wait_lock);
685+
/* Make sure we do wakeups before calling schedule */
686+
wake_up_q(&wake_q);
687+
wake_q_init(&wake_q);
688+
684689
schedule_preempt_disabled();
685690

686691
first = __mutex_waiter_is_first(lock, &waiter);
@@ -714,7 +719,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
714719
*/
715720
if (!ww_ctx->is_wait_die &&
716721
!__mutex_waiter_is_first(lock, &waiter))
717-
__ww_mutex_check_waiters(lock, ww_ctx);
722+
__ww_mutex_check_waiters(lock, ww_ctx, &wake_q);
718723
}
719724

720725
__mutex_remove_waiter(lock, &waiter);
@@ -730,6 +735,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
730735
ww_mutex_lock_acquired(ww, ww_ctx);
731736

732737
raw_spin_unlock(&lock->wait_lock);
738+
wake_up_q(&wake_q);
733739
preempt_enable();
734740
return 0;
735741

@@ -741,6 +747,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
741747
raw_spin_unlock(&lock->wait_lock);
742748
debug_mutex_free_waiter(&waiter);
743749
mutex_release(&lock->dep_map, ip);
750+
wake_up_q(&wake_q);
744751
preempt_enable();
745752
return ret;
746753
}
@@ -951,9 +958,10 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
951958
if (owner & MUTEX_FLAG_HANDOFF)
952959
__mutex_handoff(lock, next);
953960

961+
preempt_disable();
954962
raw_spin_unlock(&lock->wait_lock);
955-
956963
wake_up_q(&wake_q);
964+
preempt_enable();
957965
}
958966

959967
#ifndef CONFIG_DEBUG_LOCK_ALLOC

kernel/locking/rtmutex.c

Lines changed: 37 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,15 @@
3434

3535
static inline int __ww_mutex_add_waiter(struct rt_mutex_waiter *waiter,
3636
struct rt_mutex *lock,
37-
struct ww_acquire_ctx *ww_ctx)
37+
struct ww_acquire_ctx *ww_ctx,
38+
struct wake_q_head *wake_q)
3839
{
3940
return 0;
4041
}
4142

4243
static inline void __ww_mutex_check_waiters(struct rt_mutex *lock,
43-
struct ww_acquire_ctx *ww_ctx)
44+
struct ww_acquire_ctx *ww_ctx,
45+
struct wake_q_head *wake_q)
4446
{
4547
}
4648

@@ -1201,7 +1203,8 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
12011203
struct rt_mutex_waiter *waiter,
12021204
struct task_struct *task,
12031205
struct ww_acquire_ctx *ww_ctx,
1204-
enum rtmutex_chainwalk chwalk)
1206+
enum rtmutex_chainwalk chwalk,
1207+
struct wake_q_head *wake_q)
12051208
{
12061209
struct task_struct *owner = rt_mutex_owner(lock);
12071210
struct rt_mutex_waiter *top_waiter = waiter;
@@ -1245,7 +1248,10 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
12451248

12461249
/* Check whether the waiter should back out immediately */
12471250
rtm = container_of(lock, struct rt_mutex, rtmutex);
1248-
res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx);
1251+
preempt_disable();
1252+
res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx, wake_q);
1253+
wake_up_q(wake_q);
1254+
preempt_enable();
12491255
if (res) {
12501256
raw_spin_lock(&task->pi_lock);
12511257
rt_mutex_dequeue(lock, waiter);
@@ -1674,12 +1680,14 @@ static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock,
16741680
* @state: The task state for sleeping
16751681
* @chwalk: Indicator whether full or partial chainwalk is requested
16761682
* @waiter: Initializer waiter for blocking
1683+
* @wake_q: The wake_q to wake tasks after we release the wait_lock
16771684
*/
16781685
static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
16791686
struct ww_acquire_ctx *ww_ctx,
16801687
unsigned int state,
16811688
enum rtmutex_chainwalk chwalk,
1682-
struct rt_mutex_waiter *waiter)
1689+
struct rt_mutex_waiter *waiter,
1690+
struct wake_q_head *wake_q)
16831691
{
16841692
struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex);
16851693
struct ww_mutex *ww = ww_container_of(rtm);
@@ -1690,7 +1698,7 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
16901698
/* Try to acquire the lock again: */
16911699
if (try_to_take_rt_mutex(lock, current, NULL)) {
16921700
if (build_ww_mutex() && ww_ctx) {
1693-
__ww_mutex_check_waiters(rtm, ww_ctx);
1701+
__ww_mutex_check_waiters(rtm, ww_ctx, wake_q);
16941702
ww_mutex_lock_acquired(ww, ww_ctx);
16951703
}
16961704
return 0;
@@ -1700,15 +1708,15 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
17001708

17011709
trace_contention_begin(lock, LCB_F_RT);
17021710

1703-
ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk);
1711+
ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk, wake_q);
17041712
if (likely(!ret))
17051713
ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter);
17061714

17071715
if (likely(!ret)) {
17081716
/* acquired the lock */
17091717
if (build_ww_mutex() && ww_ctx) {
17101718
if (!ww_ctx->is_wait_die)
1711-
__ww_mutex_check_waiters(rtm, ww_ctx);
1719+
__ww_mutex_check_waiters(rtm, ww_ctx, wake_q);
17121720
ww_mutex_lock_acquired(ww, ww_ctx);
17131721
}
17141722
} else {
@@ -1730,7 +1738,8 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
17301738

17311739
static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock,
17321740
struct ww_acquire_ctx *ww_ctx,
1733-
unsigned int state)
1741+
unsigned int state,
1742+
struct wake_q_head *wake_q)
17341743
{
17351744
struct rt_mutex_waiter waiter;
17361745
int ret;
@@ -1739,7 +1748,7 @@ static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock,
17391748
waiter.ww_ctx = ww_ctx;
17401749

17411750
ret = __rt_mutex_slowlock(lock, ww_ctx, state, RT_MUTEX_MIN_CHAINWALK,
1742-
&waiter);
1751+
&waiter, wake_q);
17431752

17441753
debug_rt_mutex_free_waiter(&waiter);
17451754
return ret;
@@ -1755,6 +1764,7 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock,
17551764
struct ww_acquire_ctx *ww_ctx,
17561765
unsigned int state)
17571766
{
1767+
DEFINE_WAKE_Q(wake_q);
17581768
unsigned long flags;
17591769
int ret;
17601770

@@ -1776,8 +1786,11 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock,
17761786
* irqsave/restore variants.
17771787
*/
17781788
raw_spin_lock_irqsave(&lock->wait_lock, flags);
1779-
ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state);
1789+
ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state, &wake_q);
1790+
preempt_disable();
17801791
raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1792+
wake_up_q(&wake_q);
1793+
preempt_enable();
17811794
rt_mutex_post_schedule();
17821795

17831796
return ret;
@@ -1803,8 +1816,10 @@ static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock,
18031816
/**
18041817
* rtlock_slowlock_locked - Slow path lock acquisition for RT locks
18051818
* @lock: The underlying RT mutex
1819+
* @wake_q: The wake_q to wake tasks after we release the wait_lock
18061820
*/
1807-
static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock)
1821+
static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock,
1822+
struct wake_q_head *wake_q)
18081823
{
18091824
struct rt_mutex_waiter waiter;
18101825
struct task_struct *owner;
@@ -1821,7 +1836,7 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock)
18211836

18221837
trace_contention_begin(lock, LCB_F_RT);
18231838

1824-
task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK);
1839+
task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK, wake_q);
18251840

18261841
for (;;) {
18271842
/* Try to acquire the lock again */
@@ -1832,7 +1847,11 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock)
18321847
owner = rt_mutex_owner(lock);
18331848
else
18341849
owner = NULL;
1850+
preempt_disable();
18351851
raw_spin_unlock_irq(&lock->wait_lock);
1852+
wake_up_q(wake_q);
1853+
wake_q_init(wake_q);
1854+
preempt_enable();
18361855

18371856
if (!owner || !rtmutex_spin_on_owner(lock, &waiter, owner))
18381857
schedule_rtlock();
@@ -1857,10 +1876,14 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock)
18571876
static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock)
18581877
{
18591878
unsigned long flags;
1879+
DEFINE_WAKE_Q(wake_q);
18601880

18611881
raw_spin_lock_irqsave(&lock->wait_lock, flags);
1862-
rtlock_slowlock_locked(lock);
1882+
rtlock_slowlock_locked(lock, &wake_q);
1883+
preempt_disable();
18631884
raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1885+
wake_up_q(&wake_q);
1886+
preempt_enable();
18641887
}
18651888

18661889
#endif /* RT_MUTEX_BUILD_SPINLOCKS */

kernel/locking/rtmutex_api.c

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,7 @@ void __sched rt_mutex_proxy_unlock(struct rt_mutex_base *lock)
275275
* @lock: the rt_mutex to take
276276
* @waiter: the pre-initialized rt_mutex_waiter
277277
* @task: the task to prepare
278+
* @wake_q: the wake_q to wake tasks after we release the wait_lock
278279
*
279280
* Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock
280281
* detection. It does not wait, see rt_mutex_wait_proxy_lock() for that.
@@ -291,7 +292,8 @@ void __sched rt_mutex_proxy_unlock(struct rt_mutex_base *lock)
291292
*/
292293
int __sched __rt_mutex_start_proxy_lock(struct rt_mutex_base *lock,
293294
struct rt_mutex_waiter *waiter,
294-
struct task_struct *task)
295+
struct task_struct *task,
296+
struct wake_q_head *wake_q)
295297
{
296298
int ret;
297299

@@ -302,7 +304,7 @@ int __sched __rt_mutex_start_proxy_lock(struct rt_mutex_base *lock,
302304

303305
/* We enforce deadlock detection for futexes */
304306
ret = task_blocks_on_rt_mutex(lock, waiter, task, NULL,
305-
RT_MUTEX_FULL_CHAINWALK);
307+
RT_MUTEX_FULL_CHAINWALK, wake_q);
306308

307309
if (ret && !rt_mutex_owner(lock)) {
308310
/*
@@ -341,12 +343,16 @@ int __sched rt_mutex_start_proxy_lock(struct rt_mutex_base *lock,
341343
struct task_struct *task)
342344
{
343345
int ret;
346+
DEFINE_WAKE_Q(wake_q);
344347

345348
raw_spin_lock_irq(&lock->wait_lock);
346-
ret = __rt_mutex_start_proxy_lock(lock, waiter, task);
349+
ret = __rt_mutex_start_proxy_lock(lock, waiter, task, &wake_q);
347350
if (unlikely(ret))
348351
remove_waiter(lock, waiter);
352+
preempt_disable();
349353
raw_spin_unlock_irq(&lock->wait_lock);
354+
wake_up_q(&wake_q);
355+
preempt_enable();
350356

351357
return ret;
352358
}

kernel/locking/rtmutex_common.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,8 @@ extern void rt_mutex_init_proxy_locked(struct rt_mutex_base *lock,
8383
extern void rt_mutex_proxy_unlock(struct rt_mutex_base *lock);
8484
extern int __rt_mutex_start_proxy_lock(struct rt_mutex_base *lock,
8585
struct rt_mutex_waiter *waiter,
86-
struct task_struct *task);
86+
struct task_struct *task,
87+
struct wake_q_head *);
8788
extern int rt_mutex_start_proxy_lock(struct rt_mutex_base *lock,
8889
struct rt_mutex_waiter *waiter,
8990
struct task_struct *task);

kernel/locking/rwbase_rt.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb,
6969
unsigned int state)
7070
{
7171
struct rt_mutex_base *rtm = &rwb->rtmutex;
72+
DEFINE_WAKE_Q(wake_q);
7273
int ret;
7374

7475
rwbase_pre_schedule();
@@ -110,7 +111,7 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb,
110111
* For rwlocks this returns 0 unconditionally, so the below
111112
* !ret conditionals are optimized out.
112113
*/
113-
ret = rwbase_rtmutex_slowlock_locked(rtm, state);
114+
ret = rwbase_rtmutex_slowlock_locked(rtm, state, &wake_q);
114115

115116
/*
116117
* On success the rtmutex is held, so there can't be a writer
@@ -121,7 +122,12 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb,
121122
*/
122123
if (!ret)
123124
atomic_inc(&rwb->readers);
125+
126+
preempt_disable();
124127
raw_spin_unlock_irq(&rtm->wait_lock);
128+
wake_up_q(&wake_q);
129+
preempt_enable();
130+
125131
if (!ret)
126132
rwbase_rtmutex_unlock(rtm);
127133

kernel/locking/rwsem.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1413,8 +1413,8 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
14131413
#define rwbase_rtmutex_lock_state(rtm, state) \
14141414
__rt_mutex_lock(rtm, state)
14151415

1416-
#define rwbase_rtmutex_slowlock_locked(rtm, state) \
1417-
__rt_mutex_slowlock_locked(rtm, NULL, state)
1416+
#define rwbase_rtmutex_slowlock_locked(rtm, state, wq) \
1417+
__rt_mutex_slowlock_locked(rtm, NULL, state, wq)
14181418

14191419
#define rwbase_rtmutex_unlock(rtm) \
14201420
__rt_mutex_unlock(rtm)

kernel/locking/spinlock_rt.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -162,9 +162,10 @@ rwbase_rtmutex_lock_state(struct rt_mutex_base *rtm, unsigned int state)
162162
}
163163

164164
static __always_inline int
165-
rwbase_rtmutex_slowlock_locked(struct rt_mutex_base *rtm, unsigned int state)
165+
rwbase_rtmutex_slowlock_locked(struct rt_mutex_base *rtm, unsigned int state,
166+
struct wake_q_head *wake_q)
166167
{
167-
rtlock_slowlock_locked(rtm);
168+
rtlock_slowlock_locked(rtm, wake_q);
168169
return 0;
169170
}
170171

0 commit comments

Comments
 (0)