Skip to content

Commit cd3db48

Browse files
authored
Merge pull request #559 from bgoglin/amd-cpuid80000026
x86: use AMD CPUID 0x80000026 leaf for better topology Describes Dies (CCD) and Complex (CCX) between cores and packages. Hybrid CPU cores are also described but that's not included in this PR. This feature is likely only supported in Zen4 and later (tested on Genoa Epyc). Zen4 has Die = 1 Complex of (8 cores + L3). Hence we now have Die objects on top of each L3 on these CPUs. Zen3 would have been more interesting (1 Die = 2 Complex of (4 cores + L3)) but Zen3 doesn't support this CPUID leaf anyway. This PR comes with some generic x86 cleanups first, then actual support for this new leaf.
2 parents 9e90851 + c2b03ae commit cd3db48

File tree

8 files changed

+2811
-87
lines changed

8 files changed

+2811
-87
lines changed

doc/hwloc.doxy

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3553,6 +3553,8 @@ hwloc currently uses Groups for the following reasons:
35533553
<li>NUMA parents when memory locality does not match any existing object.</li>
35543554
<li>I/O parents when I/O locality does not match any existing object.</li>
35553555
<li>Distance-based groups made of close objects.</li>
3556+
<li>AMD Core Complex (CCX) (<tt>subtype</tt> is <tt>Complex</tt>, in the x86 backend),
3557+
but these objects are usually merged with the L3 caches or Dies.</li>
35563558
<li>AMD Bulldozer dual-core compute units (<tt>subtype</tt> is <tt>ComputeUnit</tt>, in the x86 backend),
35573559
but these objects are usually merged with the L2 caches.</li>
35583560
<li>Intel Extended Topology Enumeration levels (in the x86 backend).</li>

hwloc/topology-x86.c

Lines changed: 161 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,12 @@ struct hwloc_x86_backend_data_s {
3838
int apicid_unique;
3939
char *src_cpuiddump_path;
4040
int is_knl;
41+
int is_hybrid;
42+
int found_die_ids;
43+
int found_complex_ids;
44+
int found_unit_ids;
45+
int found_module_ids;
46+
int found_tile_ids;
4147
};
4248

4349
/************************************
@@ -210,7 +216,8 @@ struct procinfo {
210216
#define TILE 4
211217
#define MODULE 5
212218
#define DIE 6
213-
#define HWLOC_X86_PROCINFO_ID_NR 7
219+
#define COMPLEX 7
220+
#define HWLOC_X86_PROCINFO_ID_NR 8
214221
unsigned ids[HWLOC_X86_PROCINFO_ID_NR];
215222
unsigned *otherids;
216223
unsigned levels;
@@ -480,7 +487,7 @@ static void read_amd_cores_legacy(struct procinfo *infos, struct cpuiddump *src_
480487
}
481488

482489
/* AMD unit/node from CPUID 0x8000001e leaf (topoext) */
483-
static void read_amd_cores_topoext(struct procinfo *infos, unsigned long flags, struct cpuiddump *src_cpuiddump)
490+
static void read_amd_cores_topoext(struct hwloc_x86_backend_data_s *data, struct procinfo *infos, unsigned long flags, struct cpuiddump *src_cpuiddump)
484491
{
485492
unsigned apic_id, nodes_per_proc = 0;
486493
unsigned eax, ebx, ecx, edx;
@@ -510,6 +517,7 @@ static void read_amd_cores_topoext(struct procinfo *infos, unsigned long flags,
510517
unsigned cores_per_unit;
511518
/* coreid was obtained from read_amd_cores_legacy() earlier */
512519
infos->ids[UNIT] = ebx & 0xff;
520+
data->found_unit_ids = 1;
513521
cores_per_unit = ((ebx >> 8) & 0xff) + 1;
514522
hwloc_debug("topoext %08x, %u nodes, node %u, %u cores in unit %u\n", apic_id, nodes_per_proc, infos->ids[NODE], cores_per_unit, infos->ids[UNIT]);
515523
/* coreid and unitid are package-wide (core 0-15 and unit 0-7 on 16-core 2-NUMAnode processor).
@@ -524,10 +532,12 @@ static void read_amd_cores_topoext(struct procinfo *infos, unsigned long flags,
524532
}
525533
}
526534

527-
/* Intel core/thread or even die/module/tile from CPUID 0x0b or 0x1f leaves (v1 and v2 extended topology enumeration) */
528-
static void read_intel_cores_exttopoenum(struct procinfo *infos, unsigned leaf, struct cpuiddump *src_cpuiddump)
535+
/* Intel core/thread or even die/module/tile from CPUID 0x0b or 0x1f leaves (v1 and v2 extended topology enumeration)
536+
* or AMD complex/ccd from CPUID 0x80000026 (extended CPU topology)
537+
*/
538+
static void read_extended_topo(struct hwloc_x86_backend_data_s *data, struct procinfo *infos, unsigned leaf, struct cpuiddump *src_cpuiddump)
529539
{
530-
unsigned level, apic_nextshift, apic_number, apic_type, apic_id = 0, apic_shift = 0, id;
540+
unsigned level, apic_nextshift, apic_type, apic_id = 0, apic_shift = 0, id;
531541
unsigned threadid __hwloc_attribute_unused = 0; /* shut-up compiler */
532542
unsigned eax, ebx, ecx = 0, edx;
533543
int apic_packageshift = 0;
@@ -536,7 +546,11 @@ static void read_intel_cores_exttopoenum(struct procinfo *infos, unsigned leaf,
536546
ecx = level;
537547
eax = leaf;
538548
cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
539-
if (!eax && !ebx)
549+
/* Intel specifies that 0x0b/0x1f return 0 in ecx[8:15] and 0 in eax/ebx for invalid subleaves
550+
* however AMD only says that 0x80000026/0x0b returns 0 in ebx[0:15].
551+
* So use the common condition: 0 in ebx[0:15].
552+
*/
553+
if (!(ebx & 0xffff))
540554
break;
541555
apic_packageshift = eax & 0x1f;
542556
}
@@ -549,43 +563,64 @@ static void read_intel_cores_exttopoenum(struct procinfo *infos, unsigned leaf,
549563
ecx = level;
550564
eax = leaf;
551565
cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
552-
if (!eax && !ebx)
566+
if (!(ebx & 0xffff))
553567
break;
554568
apic_nextshift = eax & 0x1f;
555-
apic_number = ebx & 0xffff;
556569
apic_type = (ecx & 0xff00) >> 8;
557570
apic_id = edx;
558571
id = (apic_id >> apic_shift) & ((1 << (apic_packageshift - apic_shift)) - 1);
559-
hwloc_debug("x2APIC %08x %u: nextshift %u num %2u type %u id %2u\n", apic_id, level, apic_nextshift, apic_number, apic_type, id);
572+
hwloc_debug("x2APIC %08x %u: nextshift %u nextnumber %2u type %u id %2u\n",
573+
apic_id,
574+
level,
575+
apic_nextshift,
576+
ebx & 0xffff /* number of threads in next level */,
577+
apic_type,
578+
id);
560579
infos->apicid = apic_id;
561580
infos->otherids[level] = UINT_MAX;
562-
switch (apic_type) {
563-
case 1:
564-
threadid = id;
565-
/* apic_number is the actual number of threads per core */
566-
break;
567-
case 2:
568-
infos->ids[CORE] = id;
569-
/* apic_number is the actual number of threads per die */
570-
break;
571-
case 3:
572-
infos->ids[MODULE] = id;
573-
/* apic_number is the actual number of threads per tile */
574-
break;
575-
case 4:
576-
infos->ids[TILE] = id;
577-
/* apic_number is the actual number of threads per die */
578-
break;
579-
case 5:
580-
infos->ids[DIE] = id;
581-
/* apic_number is the actual number of threads per package */
582-
break;
583-
default:
584-
hwloc_debug("x2APIC %u: unknown type %u\n", level, apic_type);
585-
infos->otherids[level] = apic_id >> apic_shift;
586-
break;
587-
}
588-
apic_shift = apic_nextshift;
581+
switch (apic_type) {
582+
case 1:
583+
threadid = id;
584+
break;
585+
case 2:
586+
infos->ids[CORE] = id;
587+
break;
588+
case 3:
589+
if (leaf == 0x80000026) {
590+
data->found_complex_ids = 1;
591+
infos->ids[COMPLEX] = id;
592+
} else {
593+
data->found_module_ids = 1;
594+
infos->ids[MODULE] = id;
595+
}
596+
break;
597+
case 4:
598+
if (leaf == 0x80000026) {
599+
data->found_die_ids = 1;
600+
infos->ids[DIE] = id;
601+
} else {
602+
data->found_tile_ids = 1;
603+
infos->ids[TILE] = id;
604+
}
605+
break;
606+
case 5:
607+
if (leaf == 0x80000026) {
608+
goto unknown_type;
609+
} else {
610+
data->found_die_ids = 1;
611+
infos->ids[DIE] = id;
612+
}
613+
break;
614+
case 6:
615+
/* TODO: "DieGrp" on Intel */
616+
/* fallthrough */
617+
default:
618+
unknown_type:
619+
hwloc_debug("x2APIC %u: unknown type %u\n", level, apic_type);
620+
infos->otherids[level] = apic_id >> apic_shift;
621+
break;
622+
}
623+
apic_shift = apic_nextshift;
589624
}
590625
infos->apicid = apic_id;
591626
infos->ids[PKG] = apic_id >> apic_shift;
@@ -704,12 +739,13 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns
704739
}
705740

706741
if (highest_cpuid >= 0x1a && has_hybrid(features)) {
707-
/* Get hybrid cpu information from cpuid 0x1a */
742+
/* Get hybrid cpu information from cpuid 0x1a on Intel */
708743
eax = 0x1a;
709744
ecx = 0;
710745
cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
711746
infos->hybridcoretype = eax >> 24;
712747
infos->hybridnativemodel = eax & 0xffffff;
748+
data->is_hybrid = 1;
713749
}
714750

715751
/*********************************************************************************
@@ -731,21 +767,27 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns
731767
*
732768
* Only needed when x2apic supported if NUMA nodes are needed.
733769
*/
734-
read_amd_cores_topoext(infos, flags, src_cpuiddump);
770+
read_amd_cores_topoext(data, infos, flags, src_cpuiddump);
735771
}
736772

737-
if ((cpuid_type == intel) && highest_cpuid >= 0x1f) {
773+
if ((cpuid_type == amd) && highest_ext_cpuid >= 0x80000026) {
774+
/* Get socket/die/complex/core/thread information from cpuid 0x80000026
775+
* (AMD Extended CPU Topology)
776+
*/
777+
read_extended_topo(data, infos, 0x80000026, src_cpuiddump);
778+
779+
} else if ((cpuid_type == intel) && highest_cpuid >= 0x1f) {
738780
/* Get package/die/module/tile/core/thread information from cpuid 0x1f
739781
* (Intel v2 Extended Topology Enumeration)
740782
*/
741-
read_intel_cores_exttopoenum(infos, 0x1f, src_cpuiddump);
783+
read_extended_topo(data, infos, 0x1f, src_cpuiddump);
742784

743785
} else if ((cpuid_type == intel || cpuid_type == amd || cpuid_type == zhaoxin)
744786
&& highest_cpuid >= 0x0b && has_x2apic(features)) {
745787
/* Get package/core/thread information from cpuid 0x0b
746788
* (Intel v1 Extended Topology Enumeration)
747789
*/
748-
read_intel_cores_exttopoenum(infos, 0x0b, src_cpuiddump);
790+
read_extended_topo(data, infos, 0x0b, src_cpuiddump);
749791
}
750792

751793
/**************************************
@@ -1046,21 +1088,34 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns
10461088

10471089
if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_GROUP)) {
10481090
if (fulldiscovery) {
1049-
/* Look for AMD Compute units inside packages */
1050-
hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
1051-
hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset,
1052-
UNIT, "Compute Unit",
1053-
HWLOC_GROUP_KIND_AMD_COMPUTE_UNIT, 0);
1054-
/* Look for Intel Modules inside packages */
1055-
hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
1056-
hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset,
1057-
MODULE, "Module",
1058-
HWLOC_GROUP_KIND_INTEL_MODULE, 0);
1059-
/* Look for Intel Tiles inside packages */
1060-
hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
1061-
hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset,
1062-
TILE, "Tile",
1063-
HWLOC_GROUP_KIND_INTEL_TILE, 0);
1091+
if (data->found_unit_ids) {
1092+
/* Look for AMD Complex inside packages */
1093+
hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
1094+
hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset,
1095+
COMPLEX, "Complex",
1096+
HWLOC_GROUP_KIND_AMD_COMPLEX, 0);
1097+
}
1098+
if (data->found_unit_ids) {
1099+
/* Look for AMD Compute units inside packages */
1100+
hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
1101+
hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset,
1102+
UNIT, "Compute Unit",
1103+
HWLOC_GROUP_KIND_AMD_COMPUTE_UNIT, 0);
1104+
}
1105+
if (data->found_module_ids) {
1106+
/* Look for Intel Modules inside packages */
1107+
hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
1108+
hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset,
1109+
MODULE, "Module",
1110+
HWLOC_GROUP_KIND_INTEL_MODULE, 0);
1111+
}
1112+
if (data->found_tile_ids) {
1113+
/* Look for Intel Tiles inside packages */
1114+
hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
1115+
hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset,
1116+
TILE, "Tile",
1117+
HWLOC_GROUP_KIND_INTEL_TILE, 0);
1118+
}
10641119

10651120
/* Look for unknown objects */
10661121
if (infos[one].otherids) {
@@ -1094,7 +1149,8 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns
10941149
}
10951150
}
10961151

1097-
if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_DIE)) {
1152+
if (data->found_die_ids
1153+
&& hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_DIE)) {
10981154
/* Look for Intel Dies inside packages */
10991155
if (fulldiscovery) {
11001156
hwloc_bitmap_t die_cpuset;
@@ -1349,35 +1405,39 @@ look_procs(struct hwloc_backend *backend, struct procinfo *infos, unsigned long
13491405
if (data->apicid_unique) {
13501406
summarize(backend, infos, flags);
13511407

1352-
if (has_hybrid(features) && !(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_CPUKINDS)) {
1408+
if (data->is_hybrid
1409+
&& !(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_CPUKINDS)) {
13531410
/* use hybrid info for cpukinds */
1354-
hwloc_bitmap_t atomset = hwloc_bitmap_alloc();
1355-
hwloc_bitmap_t coreset = hwloc_bitmap_alloc();
1356-
for(i=0; i<nbprocs; i++) {
1357-
if (infos[i].hybridcoretype == 0x20)
1358-
hwloc_bitmap_set(atomset, i);
1359-
else if (infos[i].hybridcoretype == 0x40)
1360-
hwloc_bitmap_set(coreset, i);
1361-
}
1362-
/* register IntelAtom set if any */
1363-
if (!hwloc_bitmap_iszero(atomset)) {
1364-
struct hwloc_info_s infoattr;
1365-
infoattr.name = (char *) "CoreType";
1366-
infoattr.value = (char *) "IntelAtom";
1367-
hwloc_internal_cpukinds_register(topology, atomset, HWLOC_CPUKIND_EFFICIENCY_UNKNOWN, &infoattr, 1, 0);
1368-
/* the cpuset is given to the callee */
1369-
} else {
1370-
hwloc_bitmap_free(atomset);
1371-
}
1372-
/* register IntelCore set if any */
1373-
if (!hwloc_bitmap_iszero(coreset)) {
1374-
struct hwloc_info_s infoattr;
1375-
infoattr.name = (char *) "CoreType";
1376-
infoattr.value = (char *) "IntelCore";
1377-
hwloc_internal_cpukinds_register(topology, coreset, HWLOC_CPUKIND_EFFICIENCY_UNKNOWN, &infoattr, 1, 0);
1378-
/* the cpuset is given to the callee */
1379-
} else {
1380-
hwloc_bitmap_free(coreset);
1411+
if (cpuid_type == intel) {
1412+
/* Hybrid Intel */
1413+
hwloc_bitmap_t atomset = hwloc_bitmap_alloc();
1414+
hwloc_bitmap_t coreset = hwloc_bitmap_alloc();
1415+
for(i=0; i<nbprocs; i++) {
1416+
if (infos[i].hybridcoretype == 0x20)
1417+
hwloc_bitmap_set(atomset, i);
1418+
else if (infos[i].hybridcoretype == 0x40)
1419+
hwloc_bitmap_set(coreset, i);
1420+
}
1421+
/* register IntelAtom set if any */
1422+
if (!hwloc_bitmap_iszero(atomset)) {
1423+
struct hwloc_info_s infoattr;
1424+
infoattr.name = (char *) "CoreType";
1425+
infoattr.value = (char *) "IntelAtom";
1426+
hwloc_internal_cpukinds_register(topology, atomset, HWLOC_CPUKIND_EFFICIENCY_UNKNOWN, &infoattr, 1, 0);
1427+
/* the cpuset is given to the callee */
1428+
} else {
1429+
hwloc_bitmap_free(atomset);
1430+
}
1431+
/* register IntelCore set if any */
1432+
if (!hwloc_bitmap_iszero(coreset)) {
1433+
struct hwloc_info_s infoattr;
1434+
infoattr.name = (char *) "CoreType";
1435+
infoattr.value = (char *) "IntelCore";
1436+
hwloc_internal_cpukinds_register(topology, coreset, HWLOC_CPUKIND_EFFICIENCY_UNKNOWN, &infoattr, 1, 0);
1437+
/* the cpuset is given to the callee */
1438+
} else {
1439+
hwloc_bitmap_free(coreset);
1440+
}
13811441
}
13821442
}
13831443
}
@@ -1459,7 +1519,15 @@ int hwloc_look_x86(struct hwloc_backend *backend, unsigned long flags)
14591519
unsigned i;
14601520
unsigned highest_cpuid;
14611521
unsigned highest_ext_cpuid;
1462-
/* This stores cpuid features with the same indexing as Linux */
1522+
/* This stores cpuid features with the same indexing as Linux:
1523+
* [0] = 0x1 edx
1524+
* [1] = 0x80000001 edx
1525+
* [4] = 0x1 ecx
1526+
* [6] = 0x80000001 ecx
1527+
* [9] = 0x7/0 ebx
1528+
* [16] = 0x7/0 ecx
1529+
* [18] = 0x7/0 edx
1530+
*/
14631531
unsigned features[19] = { 0 };
14641532
struct procinfo *infos = NULL;
14651533
enum cpuid_type cpuid_type = unknown;
@@ -1579,6 +1647,7 @@ int hwloc_look_x86(struct hwloc_backend *backend, unsigned long flags)
15791647
ecx = 0;
15801648
cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
15811649
features[9] = ebx;
1650+
features[16] = ecx;
15821651
features[18] = edx;
15831652
}
15841653

@@ -1816,9 +1885,15 @@ hwloc_x86_component_instantiate(struct hwloc_topology *topology,
18161885

18171886
/* default values */
18181887
data->is_knl = 0;
1888+
data->is_hybrid = 0;
18191889
data->apicid_set = hwloc_bitmap_alloc();
18201890
data->apicid_unique = 1;
18211891
data->src_cpuiddump_path = NULL;
1892+
data->found_die_ids = 0;
1893+
data->found_complex_ids = 0;
1894+
data->found_unit_ids = 0;
1895+
data->found_module_ids = 0;
1896+
data->found_tile_ids = 0;
18221897

18231898
src_cpuiddump_path = getenv("HWLOC_CPUID_PATH");
18241899
if (src_cpuiddump_path) {

0 commit comments

Comments
 (0)