Skip to content

Commit 7de9d4f

Browse files
author
Peter Zijlstra
committed
sched: Start blocked_on chain processing in find_proxy_task()
Start to flesh out the real find_proxy_task() implementation, but avoid the migration cases for now, in those cases just deactivate the donor task and pick again. To ensure the donor task or other blocked tasks in the chain aren't migrated away while we're running the proxy, also tweak the fair class logic to avoid migrating donor or mutex blocked tasks. [jstultz: This change was split out from the larger proxy patch] Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Signed-off-by: Juri Lelli <[email protected]> Signed-off-by: Valentin Schneider <[email protected]> Signed-off-by: Connor O'Brien <[email protected]> Signed-off-by: John Stultz <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Tested-by: K Prateek Nayak <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent be39617 commit 7de9d4f

File tree

3 files changed

+135
-26
lines changed

3 files changed

+135
-26
lines changed

kernel/locking/mutex.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
*
77
* Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <[email protected]>
88
*/
9-
9+
#ifndef CONFIG_PREEMPT_RT
1010
/*
1111
* This is the control structure for tasks blocked on mutex, which resides
1212
* on the blocked task's kernel stack:
@@ -70,3 +70,4 @@ extern void debug_mutex_init(struct mutex *lock, const char *name,
7070
# define debug_mutex_unlock(lock) do { } while (0)
7171
# define debug_mutex_init(lock, name, key) do { } while (0)
7272
#endif /* !CONFIG_DEBUG_MUTEXES */
73+
#endif /* CONFIG_PREEMPT_RT */

kernel/sched/core.c

Lines changed: 123 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@
9696
#include "../workqueue_internal.h"
9797
#include "../../io_uring/io-wq.h"
9898
#include "../smpboot.h"
99+
#include "../locking/mutex.h"
99100

100101
EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpu);
101102
EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpumask);
@@ -2933,8 +2934,15 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag
29332934
struct set_affinity_pending my_pending = { }, *pending = NULL;
29342935
bool stop_pending, complete = false;
29352936

2936-
/* Can the task run on the task's current CPU? If so, we're done */
2937-
if (cpumask_test_cpu(task_cpu(p), &p->cpus_mask)) {
2937+
/*
2938+
* Can the task run on the task's current CPU? If so, we're done
2939+
*
2940+
* We are also done if the task is the current donor, boosting a lock-
2941+
* holding proxy, (and potentially has been migrated outside its
2942+
* current or previous affinity mask)
2943+
*/
2944+
if (cpumask_test_cpu(task_cpu(p), &p->cpus_mask) ||
2945+
(task_current_donor(rq, p) && !task_current(rq, p))) {
29382946
struct task_struct *push_task = NULL;
29392947

29402948
if ((flags & SCA_MIGRATE_ENABLE) &&
@@ -6573,11 +6581,12 @@ static bool try_to_block_task(struct rq *rq, struct task_struct *p,
65736581
}
65746582

65756583
#ifdef CONFIG_SCHED_PROXY_EXEC
6576-
static inline void proxy_resched_idle(struct rq *rq)
6584+
static inline struct task_struct *proxy_resched_idle(struct rq *rq)
65776585
{
65786586
put_prev_set_next_task(rq, rq->donor, rq->idle);
65796587
rq_set_donor(rq, rq->idle);
65806588
set_tsk_need_resched(rq->idle);
6589+
return rq->idle;
65816590
}
65826591

65836592
static bool __proxy_deactivate(struct rq *rq, struct task_struct *donor)
@@ -6614,36 +6623,124 @@ static struct task_struct *proxy_deactivate(struct rq *rq, struct task_struct *d
66146623
}
66156624

66166625
/*
6617-
* Initial simple sketch that just deactivates the blocked task
6618-
* chosen by pick_next_task() so we can then pick something that
6619-
* isn't blocked.
6626+
* Find runnable lock owner to proxy for mutex blocked donor
6627+
*
6628+
* Follow the blocked-on relation:
6629+
* task->blocked_on -> mutex->owner -> task...
6630+
*
6631+
* Lock order:
6632+
*
6633+
* p->pi_lock
6634+
* rq->lock
6635+
* mutex->wait_lock
6636+
*
6637+
* Returns the task that is going to be used as execution context (the one
6638+
* that is actually going to be run on cpu_of(rq)).
66206639
*/
66216640
static struct task_struct *
66226641
find_proxy_task(struct rq *rq, struct task_struct *donor, struct rq_flags *rf)
66236642
{
6643+
struct task_struct *owner = NULL;
6644+
int this_cpu = cpu_of(rq);
6645+
struct task_struct *p;
66246646
struct mutex *mutex;
66256647

6626-
mutex = donor->blocked_on;
6627-
/* Something changed in the chain, so pick again */
6628-
if (!mutex)
6629-
return NULL;
6630-
/*
6631-
* By taking mutex->wait_lock we hold off concurrent mutex_unlock()
6632-
* and ensure @owner sticks around.
6633-
*/
6634-
guard(raw_spinlock)(&mutex->wait_lock);
6648+
/* Follow blocked_on chain. */
6649+
for (p = donor; task_is_blocked(p); p = owner) {
6650+
mutex = p->blocked_on;
6651+
/* Something changed in the chain, so pick again */
6652+
if (!mutex)
6653+
return NULL;
6654+
/*
6655+
* By taking mutex->wait_lock we hold off concurrent mutex_unlock()
6656+
* and ensure @owner sticks around.
6657+
*/
6658+
guard(raw_spinlock)(&mutex->wait_lock);
66356659

6636-
/* Check again that donor is blocked with blocked_lock held */
6637-
if (!task_is_blocked(donor) || mutex != __get_task_blocked_on(donor)) {
6660+
/* Check again that p is blocked with wait_lock held */
6661+
if (mutex != __get_task_blocked_on(p)) {
6662+
/*
6663+
* Something changed in the blocked_on chain and
6664+
* we don't know if only at this level. So, let's
6665+
* just bail out completely and let __schedule()
6666+
* figure things out (pick_again loop).
6667+
*/
6668+
return NULL;
6669+
}
6670+
6671+
owner = __mutex_owner(mutex);
6672+
if (!owner) {
6673+
__clear_task_blocked_on(p, mutex);
6674+
return p;
6675+
}
6676+
6677+
if (!READ_ONCE(owner->on_rq) || owner->se.sched_delayed) {
6678+
/* XXX Don't handle blocked owners/delayed dequeue yet */
6679+
return proxy_deactivate(rq, donor);
6680+
}
6681+
6682+
if (task_cpu(owner) != this_cpu) {
6683+
/* XXX Don't handle migrations yet */
6684+
return proxy_deactivate(rq, donor);
6685+
}
6686+
6687+
if (task_on_rq_migrating(owner)) {
6688+
/*
6689+
* One of the chain of mutex owners is currently migrating to this
6690+
* CPU, but has not yet been enqueued because we are holding the
6691+
* rq lock. As a simple solution, just schedule rq->idle to give
6692+
* the migration a chance to complete. Much like the migrate_task
6693+
* case we should end up back in find_proxy_task(), this time
6694+
* hopefully with all relevant tasks already enqueued.
6695+
*/
6696+
return proxy_resched_idle(rq);
6697+
}
6698+
6699+
/*
6700+
* Its possible to race where after we check owner->on_rq
6701+
* but before we check (owner_cpu != this_cpu) that the
6702+
* task on another cpu was migrated back to this cpu. In
6703+
* that case it could slip by our checks. So double check
6704+
* we are still on this cpu and not migrating. If we get
6705+
* inconsistent results, try again.
6706+
*/
6707+
if (!task_on_rq_queued(owner) || task_cpu(owner) != this_cpu)
6708+
return NULL;
6709+
6710+
if (owner == p) {
6711+
/*
6712+
* It's possible we interleave with mutex_unlock like:
6713+
*
6714+
* lock(&rq->lock);
6715+
* find_proxy_task()
6716+
* mutex_unlock()
6717+
* lock(&wait_lock);
6718+
* donor(owner) = current->blocked_donor;
6719+
* unlock(&wait_lock);
6720+
*
6721+
* wake_up_q();
6722+
* ...
6723+
* ttwu_runnable()
6724+
* __task_rq_lock()
6725+
* lock(&wait_lock);
6726+
* owner == p
6727+
*
6728+
* Which leaves us to finish the ttwu_runnable() and make it go.
6729+
*
6730+
* So schedule rq->idle so that ttwu_runnable() can get the rq
6731+
* lock and mark owner as running.
6732+
*/
6733+
return proxy_resched_idle(rq);
6734+
}
66386735
/*
6639-
* Something changed in the blocked_on chain and
6640-
* we don't know if only at this level. So, let's
6641-
* just bail out completely and let __schedule()
6642-
* figure things out (pick_again loop).
6736+
* OK, now we're absolutely sure @owner is on this
6737+
* rq, therefore holding @rq->lock is sufficient to
6738+
* guarantee its existence, as per ttwu_remote().
66436739
*/
6644-
return NULL; /* do pick_next_task() again */
66456740
}
6646-
return proxy_deactivate(rq, donor);
6741+
6742+
WARN_ON_ONCE(owner && !owner->on_rq);
6743+
return owner;
66476744
}
66486745
#else /* SCHED_PROXY_EXEC */
66496746
static struct task_struct *
@@ -6801,10 +6898,13 @@ static void __sched notrace __schedule(int sched_mode)
68016898
next = find_proxy_task(rq, next, &rf);
68026899
if (!next)
68036900
goto pick_again;
6901+
if (next == rq->idle)
6902+
goto keep_resched;
68046903
}
68056904
picked:
68066905
clear_tsk_need_resched(prev);
68076906
clear_preempt_need_resched();
6907+
keep_resched:
68086908
rq->last_seen_need_resched_ns = 0;
68096909

68106910
is_switch = prev != next;

kernel/sched/fair.c

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9291,7 +9291,8 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
92919291
* 2) throttled_lb_pair, or
92929292
* 3) cannot be migrated to this CPU due to cpus_ptr, or
92939293
* 4) running (obviously), or
9294-
* 5) are cache-hot on their current CPU.
9294+
* 5) are cache-hot on their current CPU, or
9295+
* 6) are blocked on mutexes (if SCHED_PROXY_EXEC is enabled)
92959296
*/
92969297
if ((p->se.sched_delayed) && (env->migration_type != migrate_load))
92979298
return 0;
@@ -9313,6 +9314,9 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
93139314
if (kthread_is_per_cpu(p))
93149315
return 0;
93159316

9317+
if (task_is_blocked(p))
9318+
return 0;
9319+
93169320
if (!cpumask_test_cpu(env->dst_cpu, p->cpus_ptr)) {
93179321
int cpu;
93189322

@@ -9348,7 +9352,8 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
93489352
/* Record that we found at least one task that could run on dst_cpu */
93499353
env->flags &= ~LBF_ALL_PINNED;
93509354

9351-
if (task_on_cpu(env->src_rq, p)) {
9355+
if (task_on_cpu(env->src_rq, p) ||
9356+
task_current_donor(env->src_rq, p)) {
93529357
schedstat_inc(p->stats.nr_failed_migrations_running);
93539358
return 0;
93549359
}
@@ -9392,6 +9397,9 @@ static void detach_task(struct task_struct *p, struct lb_env *env)
93929397
schedstat_inc(p->stats.nr_forced_migrations);
93939398
}
93949399

9400+
WARN_ON(task_current(env->src_rq, p));
9401+
WARN_ON(task_current_donor(env->src_rq, p));
9402+
93959403
deactivate_task(env->src_rq, p, DEQUEUE_NOCLOCK);
93969404
set_task_cpu(p, env->dst_cpu);
93979405
}

0 commit comments

Comments
 (0)