Skip to content

Commit 2c88d45

Browse files
AlisonSchofieldPeter Zijlstra
authored andcommitted
x86, sched: Treat Intel SNC topology as default, COD as exception
Commit 1340ccf ("x86,sched: Allow topologies where NUMA nodes share an LLC") added a vendor and model specific check to never call topology_sane() for Intel Skylake Server systems where NUMA nodes share an LLC. Intel Ice Lake and Sapphire Rapids CPUs also enumerate an LLC that is shared by multiple NUMA nodes. The LLC on these CPUs is shared for off-package data access but private to the NUMA node for on-package access. Rather than managing a list of allowable SNC topologies, make this SNC topology the default, and treat Intel's Cluster-On-Die (COD) topology as the exception. In SNC mode, Sky Lake, Ice Lake, and Sapphire Rapids servers do not emit this warning: sched: CPU #3's llc-sibling CPU #0 is not on the same node! [node: 1 != 0]. Ignoring dependency. Suggested-by: Peter Zijlstra (Intel) <[email protected]> Signed-off-by: Alison Schofield <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Acked-by: Dave Hansen <[email protected]> Cc: [email protected] Link: https://lkml.kernel.org/r/[email protected]
1 parent 99cb64d commit 2c88d45

File tree

1 file changed

+46
-44
lines changed

1 file changed

+46
-44
lines changed

arch/x86/kernel/smpboot.c

Lines changed: 46 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -458,29 +458,52 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
458458
return false;
459459
}
460460

461+
static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
462+
{
463+
if (c->phys_proc_id == o->phys_proc_id &&
464+
c->cpu_die_id == o->cpu_die_id)
465+
return true;
466+
return false;
467+
}
468+
461469
/*
462-
* Define snc_cpu[] for SNC (Sub-NUMA Cluster) CPUs.
470+
* Unlike the other levels, we do not enforce keeping a
471+
* multicore group inside a NUMA node. If this happens, we will
472+
* discard the MC level of the topology later.
473+
*/
474+
static bool match_pkg(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
475+
{
476+
if (c->phys_proc_id == o->phys_proc_id)
477+
return true;
478+
return false;
479+
}
480+
481+
/*
482+
* Define intel_cod_cpu[] for Intel COD (Cluster-on-Die) CPUs.
463483
*
464-
* These are Intel CPUs that enumerate an LLC that is shared by
465-
* multiple NUMA nodes. The LLC on these systems is shared for
466-
* off-package data access but private to the NUMA node (half
467-
* of the package) for on-package access.
484+
* Any Intel CPU that has multiple nodes per package and does not
485+
* match intel_cod_cpu[] has the SNC (Sub-NUMA Cluster) topology.
468486
*
469-
* CPUID (the source of the information about the LLC) can only
470-
* enumerate the cache as being shared *or* unshared, but not
471-
* this particular configuration. The CPU in this case enumerates
472-
* the cache to be shared across the entire package (spanning both
473-
* NUMA nodes).
487+
* When in SNC mode, these CPUs enumerate an LLC that is shared
488+
* by multiple NUMA nodes. The LLC is shared for off-package data
489+
* access but private to the NUMA node (half of the package) for
490+
* on-package access. CPUID (the source of the information about
491+
* the LLC) can only enumerate the cache as shared or unshared,
492+
* but not this particular configuration.
474493
*/
475494

476-
static const struct x86_cpu_id snc_cpu[] = {
477-
X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, NULL),
495+
static const struct x86_cpu_id intel_cod_cpu[] = {
496+
X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, 0), /* COD */
497+
X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, 0), /* COD */
498+
X86_MATCH_INTEL_FAM6_MODEL(ANY, 1), /* SNC */
478499
{}
479500
};
480501

481502
static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
482503
{
504+
const struct x86_cpu_id *id = x86_match_cpu(intel_cod_cpu);
483505
int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
506+
bool intel_snc = id && id->driver_data;
484507

485508
/* Do not match if we do not have a valid APICID for cpu: */
486509
if (per_cpu(cpu_llc_id, cpu1) == BAD_APICID)
@@ -495,32 +518,12 @@ static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
495518
* means 'c' does not share the LLC of 'o'. This will be
496519
* reflected to userspace.
497520
*/
498-
if (!topology_same_node(c, o) && x86_match_cpu(snc_cpu))
521+
if (match_pkg(c, o) && !topology_same_node(c, o) && intel_snc)
499522
return false;
500523

501524
return topology_sane(c, o, "llc");
502525
}
503526

504-
/*
505-
* Unlike the other levels, we do not enforce keeping a
506-
* multicore group inside a NUMA node. If this happens, we will
507-
* discard the MC level of the topology later.
508-
*/
509-
static bool match_pkg(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
510-
{
511-
if (c->phys_proc_id == o->phys_proc_id)
512-
return true;
513-
return false;
514-
}
515-
516-
static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
517-
{
518-
if ((c->phys_proc_id == o->phys_proc_id) &&
519-
(c->cpu_die_id == o->cpu_die_id))
520-
return true;
521-
return false;
522-
}
523-
524527

525528
#if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_MC)
526529
static inline int x86_sched_itmt_flags(void)
@@ -592,14 +595,23 @@ void set_cpu_sibling_map(int cpu)
592595
for_each_cpu(i, cpu_sibling_setup_mask) {
593596
o = &cpu_data(i);
594597

598+
if (match_pkg(c, o) && !topology_same_node(c, o))
599+
x86_has_numa_in_package = true;
600+
595601
if ((i == cpu) || (has_smt && match_smt(c, o)))
596602
link_mask(topology_sibling_cpumask, cpu, i);
597603

598604
if ((i == cpu) || (has_mp && match_llc(c, o)))
599605
link_mask(cpu_llc_shared_mask, cpu, i);
600606

607+
if ((i == cpu) || (has_mp && match_die(c, o)))
608+
link_mask(topology_die_cpumask, cpu, i);
601609
}
602610

611+
threads = cpumask_weight(topology_sibling_cpumask(cpu));
612+
if (threads > __max_smt_threads)
613+
__max_smt_threads = threads;
614+
603615
/*
604616
* This needs a separate iteration over the cpus because we rely on all
605617
* topology_sibling_cpumask links to be set-up.
@@ -613,8 +625,7 @@ void set_cpu_sibling_map(int cpu)
613625
/*
614626
* Does this new cpu bringup a new core?
615627
*/
616-
if (cpumask_weight(
617-
topology_sibling_cpumask(cpu)) == 1) {
628+
if (threads == 1) {
618629
/*
619630
* for each core in package, increment
620631
* the booted_cores for this new cpu
@@ -631,16 +642,7 @@ void set_cpu_sibling_map(int cpu)
631642
} else if (i != cpu && !c->booted_cores)
632643
c->booted_cores = cpu_data(i).booted_cores;
633644
}
634-
if (match_pkg(c, o) && !topology_same_node(c, o))
635-
x86_has_numa_in_package = true;
636-
637-
if ((i == cpu) || (has_mp && match_die(c, o)))
638-
link_mask(topology_die_cpumask, cpu, i);
639645
}
640-
641-
threads = cpumask_weight(topology_sibling_cpumask(cpu));
642-
if (threads > __max_smt_threads)
643-
__max_smt_threads = threads;
644646
}
645647

646648
/* maps the cpu to the sched domain representing multi-core */

0 commit comments

Comments
 (0)