Skip to content

Commit dfa4ed2

Browse files
arighihtejun
authored andcommitted
sched_ext: Introduce LLC awareness to the default idle selection policy
Rely on the scheduler topology information to implement basic LLC awareness in the sched_ext build-in idle selection policy. This allows schedulers using the built-in policy to make more informed decisions when selecting an idle CPU in systems with multiple LLCs, such as NUMA systems or chiplet-based architectures, and it helps keep tasks within the same LLC domain, thereby improving cache locality. For efficiency, LLC awareness is applied only to tasks that can run on all the CPUs in the system for now. If a task's affinity is modified from user space, it's the responsibility of user space to choose the appropriate optimized scheduling domain. Signed-off-by: Andrea Righi <[email protected]> Signed-off-by: Tejun Heo <[email protected]>
1 parent b452ae4 commit dfa4ed2

File tree

1 file changed

+60
-0
lines changed

1 file changed

+60
-0
lines changed

kernel/sched/ext.c

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3124,9 +3124,39 @@ static s32 scx_pick_idle_cpu(const struct cpumask *cpus_allowed, u64 flags)
31243124
goto retry;
31253125
}
31263126

3127+
#ifdef CONFIG_SCHED_MC
3128+
/*
3129+
* Return the cpumask of CPUs usable by task @p in the same LLC domain of @cpu,
3130+
* or NULL if the LLC domain cannot be determined.
3131+
*/
3132+
static const struct cpumask *llc_domain(const struct task_struct *p, s32 cpu)
3133+
{
3134+
struct sched_domain *sd = rcu_dereference(per_cpu(sd_llc, cpu));
3135+
const struct cpumask *llc_cpus = sd ? sched_domain_span(sd) : NULL;
3136+
3137+
/*
3138+
* Return the LLC domain only if the task is allowed to run on all
3139+
* CPUs.
3140+
*/
3141+
return p->nr_cpus_allowed == nr_cpu_ids ? llc_cpus : NULL;
3142+
}
3143+
#else /* CONFIG_SCHED_MC */
3144+
static inline const struct cpumask *llc_domain(struct task_struct *p, s32 cpu)
3145+
{
3146+
return NULL;
3147+
}
3148+
#endif /* CONFIG_SCHED_MC */
3149+
3150+
/*
3151+
* Built-in cpu idle selection policy.
3152+
*
3153+
* NOTE: tasks that can only run on 1 CPU are excluded by this logic, because
3154+
* we never call ops.select_cpu() for them, see select_task_rq().
3155+
*/
31273156
static s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu,
31283157
u64 wake_flags, bool *found)
31293158
{
3159+
const struct cpumask *llc_cpus = llc_domain(p, prev_cpu);
31303160
s32 cpu;
31313161

31323162
*found = false;
@@ -3178,22 +3208,52 @@ static s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu,
31783208
* partially idle @prev_cpu.
31793209
*/
31803210
if (sched_smt_active()) {
3211+
/*
3212+
* Keep using @prev_cpu if it's part of a fully idle core.
3213+
*/
31813214
if (cpumask_test_cpu(prev_cpu, idle_masks.smt) &&
31823215
test_and_clear_cpu_idle(prev_cpu)) {
31833216
cpu = prev_cpu;
31843217
goto cpu_found;
31853218
}
31863219

3220+
/*
3221+
* Search for any fully idle core in the same LLC domain.
3222+
*/
3223+
if (llc_cpus) {
3224+
cpu = scx_pick_idle_cpu(llc_cpus, SCX_PICK_IDLE_CORE);
3225+
if (cpu >= 0)
3226+
goto cpu_found;
3227+
}
3228+
3229+
/*
3230+
* Search for any full idle core usable by the task.
3231+
*/
31873232
cpu = scx_pick_idle_cpu(p->cpus_ptr, SCX_PICK_IDLE_CORE);
31883233
if (cpu >= 0)
31893234
goto cpu_found;
31903235
}
31913236

3237+
/*
3238+
* Use @prev_cpu if it's idle.
3239+
*/
31923240
if (test_and_clear_cpu_idle(prev_cpu)) {
31933241
cpu = prev_cpu;
31943242
goto cpu_found;
31953243
}
31963244

3245+
/*
3246+
* Search for any idle CPU in the same LLC domain.
3247+
*/
3248+
if (llc_cpus) {
3249+
cpu = scx_pick_idle_cpu(llc_cpus, 0);
3250+
if (cpu >= 0)
3251+
goto cpu_found;
3252+
}
3253+
3254+
/*
3255+
* Search for any idle CPU usable by the task.
3256+
*/
31973257
cpu = scx_pick_idle_cpu(p->cpus_ptr, 0);
31983258
if (cpu >= 0)
31993259
goto cpu_found;

0 commit comments

Comments
 (0)