Skip to content

Commit 8990929

Browse files
committed
sched_ext: Don't use double locking to migrate tasks across CPUs
consume_remote_task() and dispatch_to_local_dsq() use move_task_to_local_dsq() to migrate the task to the target CPU. Currently, move_task_to_local_dsq() expects the caller to lock both the source and destination rq's. While this may save a few lock operations while the rq's are not contended, under contention, the double locking can exacerbate the situation significantly (refer to the linked message below). Update the migration path so that double locking is not used. move_task_to_local_dsq() now expects the caller to be locking the source rq, drops it and then acquires the destination rq lock. Code is simpler this way and, on a 2-way NUMA machine w/ Xeon Gold 6138, 'hackbench 100 thread 5000` shows ~3% improvement with scx_simple. Signed-off-by: Tejun Heo <[email protected]> Suggested-by: Peter Zijlstra <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Acked-by: David Vernet <[email protected]>
1 parent 33d031e commit 8990929

File tree

1 file changed

+46
-88
lines changed

1 file changed

+46
-88
lines changed

kernel/sched/ext.c

Lines changed: 46 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -2097,121 +2097,76 @@ static bool yield_to_task_scx(struct rq *rq, struct task_struct *to)
20972097
#ifdef CONFIG_SMP
20982098
/**
20992099
* move_task_to_local_dsq - Move a task from a different rq to a local DSQ
2100-
* @rq: rq to move the task into, currently locked
21012100
* @p: task to move
21022101
* @enq_flags: %SCX_ENQ_*
2102+
* @src_rq: rq to move the task from, locked on entry, released on return
2103+
* @dst_rq: rq to move the task into, locked on return
21032104
*
2104-
* Move @p which is currently on a different rq to @rq's local DSQ. The caller
2105+
* Move @p which is currently on @src_rq to @dst_rq's local DSQ. The caller
21052106
* must:
21062107
*
21072108
* 1. Start with exclusive access to @p either through its DSQ lock or
21082109
* %SCX_OPSS_DISPATCHING flag.
21092110
*
21102111
* 2. Set @p->scx.holding_cpu to raw_smp_processor_id().
21112112
*
2112-
* 3. Remember task_rq(@p). Release the exclusive access so that we don't
2113-
* deadlock with dequeue.
2113+
* 3. Remember task_rq(@p) as @src_rq. Release the exclusive access so that we
2114+
* don't deadlock with dequeue.
21142115
*
2115-
* 4. Lock @rq and the task_rq from #3.
2116+
* 4. Lock @src_rq from #3.
21162117
*
21172118
* 5. Call this function.
21182119
*
21192120
* Returns %true if @p was successfully moved. %false after racing dequeue and
2120-
* losing.
2121+
* losing. On return, @src_rq is unlocked and @dst_rq is locked.
21212122
*/
2122-
static bool move_task_to_local_dsq(struct rq *rq, struct task_struct *p,
2123-
u64 enq_flags)
2123+
static bool move_task_to_local_dsq(struct task_struct *p, u64 enq_flags,
2124+
struct rq *src_rq, struct rq *dst_rq)
21242125
{
2125-
struct rq *task_rq;
2126-
2127-
lockdep_assert_rq_held(rq);
2126+
lockdep_assert_rq_held(src_rq);
21282127

21292128
/*
2130-
* If dequeue got to @p while we were trying to lock both rq's, it'd
2131-
* have cleared @p->scx.holding_cpu to -1. While other cpus may have
2132-
* updated it to different values afterwards, as this operation can't be
2129+
* If dequeue got to @p while we were trying to lock @src_rq, it'd have
2130+
* cleared @p->scx.holding_cpu to -1. While other cpus may have updated
2131+
* it to different values afterwards, as this operation can't be
21332132
* preempted or recurse, @p->scx.holding_cpu can never become
21342133
* raw_smp_processor_id() again before we're done. Thus, we can tell
21352134
* whether we lost to dequeue by testing whether @p->scx.holding_cpu is
21362135
* still raw_smp_processor_id().
21372136
*
2137+
* @p->rq couldn't have changed if we're still the holding cpu.
2138+
*
21382139
* See dispatch_dequeue() for the counterpart.
21392140
*/
2140-
if (unlikely(p->scx.holding_cpu != raw_smp_processor_id()))
2141+
if (unlikely(p->scx.holding_cpu != raw_smp_processor_id()) ||
2142+
WARN_ON_ONCE(src_rq != task_rq(p))) {
2143+
raw_spin_rq_unlock(src_rq);
2144+
raw_spin_rq_lock(dst_rq);
21412145
return false;
2146+
}
21422147

2143-
/* @p->rq couldn't have changed if we're still the holding cpu */
2144-
task_rq = task_rq(p);
2145-
lockdep_assert_rq_held(task_rq);
2148+
/* the following marks @p MIGRATING which excludes dequeue */
2149+
deactivate_task(src_rq, p, 0);
2150+
set_task_cpu(p, cpu_of(dst_rq));
2151+
p->scx.sticky_cpu = cpu_of(dst_rq);
21462152

2147-
WARN_ON_ONCE(!cpumask_test_cpu(cpu_of(rq), p->cpus_ptr));
2148-
deactivate_task(task_rq, p, 0);
2149-
set_task_cpu(p, cpu_of(rq));
2150-
p->scx.sticky_cpu = cpu_of(rq);
2153+
raw_spin_rq_unlock(src_rq);
2154+
raw_spin_rq_lock(dst_rq);
21512155

21522156
/*
21532157
* We want to pass scx-specific enq_flags but activate_task() will
21542158
* truncate the upper 32 bit. As we own @rq, we can pass them through
21552159
* @rq->scx.extra_enq_flags instead.
21562160
*/
2157-
WARN_ON_ONCE(rq->scx.extra_enq_flags);
2158-
rq->scx.extra_enq_flags = enq_flags;
2159-
activate_task(rq, p, 0);
2160-
rq->scx.extra_enq_flags = 0;
2161+
WARN_ON_ONCE(!cpumask_test_cpu(cpu_of(dst_rq), p->cpus_ptr));
2162+
WARN_ON_ONCE(dst_rq->scx.extra_enq_flags);
2163+
dst_rq->scx.extra_enq_flags = enq_flags;
2164+
activate_task(dst_rq, p, 0);
2165+
dst_rq->scx.extra_enq_flags = 0;
21612166

21622167
return true;
21632168
}
21642169

2165-
/**
2166-
* dispatch_to_local_dsq_lock - Ensure source and destination rq's are locked
2167-
* @rq: current rq which is locked
2168-
* @src_rq: rq to move task from
2169-
* @dst_rq: rq to move task to
2170-
*
2171-
* We're holding @rq lock and trying to dispatch a task from @src_rq to
2172-
* @dst_rq's local DSQ and thus need to lock both @src_rq and @dst_rq. Whether
2173-
* @rq stays locked isn't important as long as the state is restored after
2174-
* dispatch_to_local_dsq_unlock().
2175-
*/
2176-
static void dispatch_to_local_dsq_lock(struct rq *rq, struct rq *src_rq,
2177-
struct rq *dst_rq)
2178-
{
2179-
if (src_rq == dst_rq) {
2180-
raw_spin_rq_unlock(rq);
2181-
raw_spin_rq_lock(dst_rq);
2182-
} else if (rq == src_rq) {
2183-
double_lock_balance(rq, dst_rq);
2184-
} else if (rq == dst_rq) {
2185-
double_lock_balance(rq, src_rq);
2186-
} else {
2187-
raw_spin_rq_unlock(rq);
2188-
double_rq_lock(src_rq, dst_rq);
2189-
}
2190-
}
2191-
2192-
/**
2193-
* dispatch_to_local_dsq_unlock - Undo dispatch_to_local_dsq_lock()
2194-
* @rq: current rq which is locked
2195-
* @src_rq: rq to move task from
2196-
* @dst_rq: rq to move task to
2197-
*
2198-
* Unlock @src_rq and @dst_rq and ensure that @rq is locked on return.
2199-
*/
2200-
static void dispatch_to_local_dsq_unlock(struct rq *rq, struct rq *src_rq,
2201-
struct rq *dst_rq)
2202-
{
2203-
if (src_rq == dst_rq) {
2204-
raw_spin_rq_unlock(dst_rq);
2205-
raw_spin_rq_lock(rq);
2206-
} else if (rq == src_rq) {
2207-
double_unlock_balance(rq, dst_rq);
2208-
} else if (rq == dst_rq) {
2209-
double_unlock_balance(rq, src_rq);
2210-
} else {
2211-
double_rq_unlock(src_rq, dst_rq);
2212-
raw_spin_rq_lock(rq);
2213-
}
2214-
}
22152170
#endif /* CONFIG_SMP */
22162171

22172172
static void consume_local_task(struct rq *rq, struct scx_dispatch_q *dsq,
@@ -2263,8 +2218,6 @@ static bool task_can_run_on_remote_rq(struct task_struct *p, struct rq *rq)
22632218
static bool consume_remote_task(struct rq *rq, struct scx_dispatch_q *dsq,
22642219
struct task_struct *p, struct rq *task_rq)
22652220
{
2266-
bool moved = false;
2267-
22682221
lockdep_assert_held(&dsq->lock); /* released on return */
22692222

22702223
/*
@@ -2280,13 +2233,10 @@ static bool consume_remote_task(struct rq *rq, struct scx_dispatch_q *dsq,
22802233
p->scx.holding_cpu = raw_smp_processor_id();
22812234
raw_spin_unlock(&dsq->lock);
22822235

2283-
double_lock_balance(rq, task_rq);
2284-
2285-
moved = move_task_to_local_dsq(rq, p, 0);
2286-
2287-
double_unlock_balance(rq, task_rq);
2236+
raw_spin_rq_unlock(rq);
2237+
raw_spin_rq_lock(task_rq);
22882238

2289-
return moved;
2239+
return move_task_to_local_dsq(p, 0, task_rq, rq);
22902240
}
22912241
#else /* CONFIG_SMP */
22922242
static bool task_can_run_on_remote_rq(struct task_struct *p, struct rq *rq) { return false; }
@@ -2380,7 +2330,6 @@ dispatch_to_local_dsq(struct rq *rq, u64 dsq_id, struct task_struct *p,
23802330

23812331
#ifdef CONFIG_SMP
23822332
if (cpumask_test_cpu(cpu_of(dst_rq), p->cpus_ptr)) {
2383-
struct rq *locked_dst_rq = dst_rq;
23842333
bool dsp;
23852334

23862335
/*
@@ -2399,7 +2348,11 @@ dispatch_to_local_dsq(struct rq *rq, u64 dsq_id, struct task_struct *p,
23992348
/* store_release ensures that dequeue sees the above */
24002349
atomic_long_set_release(&p->scx.ops_state, SCX_OPSS_NONE);
24012350

2402-
dispatch_to_local_dsq_lock(rq, src_rq, locked_dst_rq);
2351+
/* switch to @src_rq lock */
2352+
if (rq != src_rq) {
2353+
raw_spin_rq_unlock(rq);
2354+
raw_spin_rq_lock(src_rq);
2355+
}
24032356

24042357
/*
24052358
* We don't require the BPF scheduler to avoid dispatching to
@@ -2426,15 +2379,20 @@ dispatch_to_local_dsq(struct rq *rq, u64 dsq_id, struct task_struct *p,
24262379
enq_flags);
24272380
}
24282381
} else {
2429-
dsp = move_task_to_local_dsq(dst_rq, p, enq_flags);
2382+
dsp = move_task_to_local_dsq(p, enq_flags,
2383+
src_rq, dst_rq);
24302384
}
24312385

24322386
/* if the destination CPU is idle, wake it up */
24332387
if (dsp && sched_class_above(p->sched_class,
24342388
dst_rq->curr->sched_class))
24352389
resched_curr(dst_rq);
24362390

2437-
dispatch_to_local_dsq_unlock(rq, src_rq, locked_dst_rq);
2391+
/* switch back to @rq lock */
2392+
if (rq != dst_rq) {
2393+
raw_spin_rq_unlock(dst_rq);
2394+
raw_spin_rq_lock(rq);
2395+
}
24382396

24392397
return dsp ? DTL_DISPATCHED : DTL_LOST;
24402398
}

0 commit comments

Comments
 (0)