Skip to content

Commit 4572541

Browse files
arighihtejun
authored andcommitted
sched_ext: Use the NUMA scheduling domain for NUMA optimizations
Rely on the NUMA scheduling domain topology, instead of accessing NUMA topology information directly. There is basically no functional change, but in this way we ensure consistent use of the same topology information determined by the scheduling subsystem. Fixes: f6ce6b9 ("sched_ext: Do not enable LLC/NUMA optimizations when domains overlap") Signed-off-by: Andrea Righi <[email protected]> Signed-off-by: Tejun Heo <[email protected]>
1 parent f24d192 commit 4572541

File tree

1 file changed

+86
-28
lines changed

1 file changed

+86
-28
lines changed

kernel/sched/ext.c

Lines changed: 86 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -3215,6 +3215,74 @@ static s32 scx_pick_idle_cpu(const struct cpumask *cpus_allowed, u64 flags)
32153215
goto retry;
32163216
}
32173217

3218+
/*
3219+
* Return the amount of CPUs in the same LLC domain of @cpu (or zero if the LLC
3220+
* domain is not defined).
3221+
*/
3222+
static unsigned int llc_weight(s32 cpu)
3223+
{
3224+
struct sched_domain *sd;
3225+
3226+
sd = rcu_dereference(per_cpu(sd_llc, cpu));
3227+
if (!sd)
3228+
return 0;
3229+
3230+
return sd->span_weight;
3231+
}
3232+
3233+
/*
3234+
* Return the cpumask representing the LLC domain of @cpu (or NULL if the LLC
3235+
* domain is not defined).
3236+
*/
3237+
static struct cpumask *llc_span(s32 cpu)
3238+
{
3239+
struct sched_domain *sd;
3240+
3241+
sd = rcu_dereference(per_cpu(sd_llc, cpu));
3242+
if (!sd)
3243+
return 0;
3244+
3245+
return sched_domain_span(sd);
3246+
}
3247+
3248+
/*
3249+
* Return the amount of CPUs in the same NUMA domain of @cpu (or zero if the
3250+
* NUMA domain is not defined).
3251+
*/
3252+
static unsigned int numa_weight(s32 cpu)
3253+
{
3254+
struct sched_domain *sd;
3255+
struct sched_group *sg;
3256+
3257+
sd = rcu_dereference(per_cpu(sd_numa, cpu));
3258+
if (!sd)
3259+
return 0;
3260+
sg = sd->groups;
3261+
if (!sg)
3262+
return 0;
3263+
3264+
return sg->group_weight;
3265+
}
3266+
3267+
/*
3268+
* Return the cpumask representing the NUMA domain of @cpu (or NULL if the NUMA
3269+
* domain is not defined).
3270+
*/
3271+
static struct cpumask *numa_span(s32 cpu)
3272+
{
3273+
struct sched_domain *sd;
3274+
struct sched_group *sg;
3275+
3276+
sd = rcu_dereference(per_cpu(sd_numa, cpu));
3277+
if (!sd)
3278+
return NULL;
3279+
sg = sd->groups;
3280+
if (!sg)
3281+
return NULL;
3282+
3283+
return sched_group_span(sg);
3284+
}
3285+
32183286
/*
32193287
* Return true if the LLC domains do not perfectly overlap with the NUMA
32203288
* domains, false otherwise.
@@ -3246,19 +3314,10 @@ static bool llc_numa_mismatch(void)
32463314
* overlapping, which is incorrect (as NUMA 1 has two distinct LLC
32473315
* domains).
32483316
*/
3249-
for_each_online_cpu(cpu) {
3250-
const struct cpumask *numa_cpus;
3251-
struct sched_domain *sd;
3252-
3253-
sd = rcu_dereference(per_cpu(sd_llc, cpu));
3254-
if (!sd)
3317+
for_each_online_cpu(cpu)
3318+
if (llc_weight(cpu) != numa_weight(cpu))
32553319
return true;
32563320

3257-
numa_cpus = cpumask_of_node(cpu_to_node(cpu));
3258-
if (sd->span_weight != cpumask_weight(numa_cpus))
3259-
return true;
3260-
}
3261-
32623321
return false;
32633322
}
32643323

@@ -3276,8 +3335,7 @@ static bool llc_numa_mismatch(void)
32763335
static void update_selcpu_topology(void)
32773336
{
32783337
bool enable_llc = false, enable_numa = false;
3279-
struct sched_domain *sd;
3280-
const struct cpumask *cpus;
3338+
unsigned int nr_cpus;
32813339
s32 cpu = cpumask_first(cpu_online_mask);
32823340

32833341
/*
@@ -3291,10 +3349,12 @@ static void update_selcpu_topology(void)
32913349
* CPUs.
32923350
*/
32933351
rcu_read_lock();
3294-
sd = rcu_dereference(per_cpu(sd_llc, cpu));
3295-
if (sd) {
3296-
if (sd->span_weight < num_online_cpus())
3352+
nr_cpus = llc_weight(cpu);
3353+
if (nr_cpus > 0) {
3354+
if (nr_cpus < num_online_cpus())
32973355
enable_llc = true;
3356+
pr_debug("sched_ext: LLC=%*pb weight=%u\n",
3357+
cpumask_pr_args(llc_span(cpu)), llc_weight(cpu));
32983358
}
32993359

33003360
/*
@@ -3306,9 +3366,13 @@ static void update_selcpu_topology(void)
33063366
* enabling both NUMA and LLC optimizations is unnecessary, as checking
33073367
* for an idle CPU in the same domain twice is redundant.
33083368
*/
3309-
cpus = cpumask_of_node(cpu_to_node(cpu));
3310-
if ((cpumask_weight(cpus) < num_online_cpus()) && llc_numa_mismatch())
3311-
enable_numa = true;
3369+
nr_cpus = numa_weight(cpu);
3370+
if (nr_cpus > 0) {
3371+
if (nr_cpus < num_online_cpus() && llc_numa_mismatch())
3372+
enable_numa = true;
3373+
pr_debug("sched_ext: NUMA=%*pb weight=%u\n",
3374+
cpumask_pr_args(numa_span(cpu)), numa_weight(cpu));
3375+
}
33123376
rcu_read_unlock();
33133377

33143378
pr_debug("sched_ext: LLC idle selection %s\n",
@@ -3360,7 +3424,6 @@ static s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu,
33603424

33613425
*found = false;
33623426

3363-
33643427
/*
33653428
* This is necessary to protect llc_cpus.
33663429
*/
@@ -3379,15 +3442,10 @@ static s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu,
33793442
*/
33803443
if (p->nr_cpus_allowed >= num_possible_cpus()) {
33813444
if (static_branch_maybe(CONFIG_NUMA, &scx_selcpu_topo_numa))
3382-
numa_cpus = cpumask_of_node(cpu_to_node(prev_cpu));
3383-
3384-
if (static_branch_maybe(CONFIG_SCHED_MC, &scx_selcpu_topo_llc)) {
3385-
struct sched_domain *sd;
3445+
numa_cpus = numa_span(prev_cpu);
33863446

3387-
sd = rcu_dereference(per_cpu(sd_llc, prev_cpu));
3388-
if (sd)
3389-
llc_cpus = sched_domain_span(sd);
3390-
}
3447+
if (static_branch_maybe(CONFIG_SCHED_MC, &scx_selcpu_topo_llc))
3448+
llc_cpus = llc_span(prev_cpu);
33913449
}
33923450

33933451
/*

0 commit comments

Comments
 (0)