Skip to content

Commit ae6f3cc

Browse files
committed
x86: add support for AMD 0x80000026 CPUID leaf
This new leaf is similar to Intel 0x0b (already supported on AMD) and Intel 0x1f but it comes with AMD-specific levels (CCX="Complex" and CCD=Die which doesn't have the same value as Intel die level). This is supported on Zen4 (at least Genoa EPYCs) where there's currently a single CCX per CCD, hence we get Dies (but no "Complex" groups). Signed-off-by: Brice Goglin <[email protected]>
1 parent 20c7945 commit ae6f3cc

File tree

3 files changed

+69
-33
lines changed

3 files changed

+69
-33
lines changed

doc/hwloc.doxy

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3553,6 +3553,8 @@ hwloc currently uses Groups for the following reasons:
35533553
<li>NUMA parents when memory locality does not match any existing object.</li>
35543554
<li>I/O parents when I/O locality does not match any existing object.</li>
35553555
<li>Distance-based groups made of close objects.</li>
3556+
<li>AMD Core Complex (CCX) (<tt>subtype</tt> is <tt>Complex</tt>, in the x86 backend),
3557+
but these objects are usually merged with the L3 caches or Dies.</li>
35563558
<li>AMD Bulldozer dual-core compute units (<tt>subtype</tt> is <tt>ComputeUnit</tt>, in the x86 backend),
35573559
but these objects are usually merged with the L2 caches.</li>
35583560
<li>Intel Extended Topology Enumeration levels (in the x86 backend).</li>

hwloc/topology-x86.c

Lines changed: 65 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ struct hwloc_x86_backend_data_s {
4040
int is_knl;
4141
int is_hybrid;
4242
int found_die_ids;
43+
int found_complex_ids;
4344
int found_unit_ids;
4445
int found_module_ids;
4546
int found_tile_ids;
@@ -215,7 +216,8 @@ struct procinfo {
215216
#define TILE 4
216217
#define MODULE 5
217218
#define DIE 6
218-
#define HWLOC_X86_PROCINFO_ID_NR 7
219+
#define COMPLEX 7
220+
#define HWLOC_X86_PROCINFO_ID_NR 8
219221
unsigned ids[HWLOC_X86_PROCINFO_ID_NR];
220222
unsigned *otherids;
221223
unsigned levels;
@@ -530,8 +532,10 @@ static void read_amd_cores_topoext(struct hwloc_x86_backend_data_s *data, struct
530532
}
531533
}
532534

533-
/* Intel core/thread or even die/module/tile from CPUID 0x0b or 0x1f leaves (v1 and v2 extended topology enumeration) */
534-
static void read_intel_cores_exttopoenum(struct hwloc_x86_backend_data_s *data, struct procinfo *infos, unsigned leaf, struct cpuiddump *src_cpuiddump)
535+
/* Intel core/thread or even die/module/tile from CPUID 0x0b or 0x1f leaves (v1 and v2 extended topology enumeration)
536+
* or AMD complex/ccd from CPUID 0x80000026 (extended CPU topology)
537+
*/
538+
static void read_extended_topo(struct hwloc_x86_backend_data_s *data, struct procinfo *infos, unsigned leaf, struct cpuiddump *src_cpuiddump)
535539
{
536540
unsigned level, apic_nextshift, apic_type, apic_id = 0, apic_shift = 0, id;
537541
unsigned threadid __hwloc_attribute_unused = 0; /* shut-up compiler */
@@ -543,7 +547,7 @@ static void read_intel_cores_exttopoenum(struct hwloc_x86_backend_data_s *data,
543547
eax = leaf;
544548
cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
545549
/* Intel specifies that 0x0b/0x1f return 0 in ecx[8:15] and 0 in eax/ebx for invalid subleaves
546-
* however AMD only says that 0x0b returns 0 in ebx[0:15].
550+
* however AMD only says that 0x80000026/0x0b returns 0 in ebx[0:15].
547551
* So use the common condition: 0 in ebx[0:15].
548552
*/
549553
if (!(ebx & 0xffff))
@@ -574,34 +578,49 @@ static void read_intel_cores_exttopoenum(struct hwloc_x86_backend_data_s *data,
574578
id);
575579
infos->apicid = apic_id;
576580
infos->otherids[level] = UINT_MAX;
577-
switch (apic_type) {
578-
case 1:
579-
threadid = id;
580-
break;
581-
case 2:
582-
infos->ids[CORE] = id;
583-
break;
584-
case 3:
585-
data->found_module_ids = 1;
586-
infos->ids[MODULE] = id;
587-
break;
588-
case 4:
589-
data->found_tile_ids = 1;
590-
infos->ids[TILE] = id;
591-
break;
592-
case 5:
593-
data->found_die_ids = 1;
594-
infos->ids[DIE] = id;
595-
break;
581+
switch (apic_type) {
582+
case 1:
583+
threadid = id;
584+
break;
585+
case 2:
586+
infos->ids[CORE] = id;
587+
break;
588+
case 3:
589+
if (leaf == 0x80000026) {
590+
data->found_complex_ids = 1;
591+
infos->ids[COMPLEX] = id;
592+
} else {
593+
data->found_module_ids = 1;
594+
infos->ids[MODULE] = id;
595+
}
596+
break;
597+
case 4:
598+
if (leaf == 0x80000026) {
599+
data->found_die_ids = 1;
600+
infos->ids[DIE] = id;
601+
} else {
602+
data->found_tile_ids = 1;
603+
infos->ids[TILE] = id;
604+
}
605+
break;
606+
case 5:
607+
if (leaf == 0x80000026) {
608+
goto unknown_type;
609+
} else {
610+
data->found_die_ids = 1;
611+
infos->ids[DIE] = id;
612+
}
613+
break;
596614
case 6:
597615
/* TODO: "DieGrp" on Intel */
598616
/* fallthrough */
599-
default:
600-
hwloc_debug("x2APIC %u: unknown type %u\n", level, apic_type);
601-
infos->otherids[level] = apic_id >> apic_shift;
602-
break;
603-
}
604-
apic_shift = apic_nextshift;
617+
default:
618+
unknown_type:
619+
hwloc_debug("x2APIC %u: unknown type %u\n", level, apic_type);
620+
infos->otherids[level] = apic_id >> apic_shift;
621+
break;
622+
}
623+
apic_shift = apic_nextshift;
605624
}
606625
infos->apicid = apic_id;
607626
infos->ids[PKG] = apic_id >> apic_shift;
@@ -751,18 +770,24 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns
751770
read_amd_cores_topoext(data, infos, flags, src_cpuiddump);
752771
}
753772

754-
if ((cpuid_type == intel) && highest_cpuid >= 0x1f) {
773+
if ((cpuid_type == amd) && highest_ext_cpuid >= 0x80000026) {
774+
/* Get socket/die/complex/core/thread information from cpuid 0x80000026
775+
* (AMD Extended CPU Topology)
776+
*/
777+
read_extended_topo(data, infos, 0x80000026, src_cpuiddump);
778+
779+
} else if ((cpuid_type == intel) && highest_cpuid >= 0x1f) {
755780
/* Get package/die/module/tile/core/thread information from cpuid 0x1f
756781
* (Intel v2 Extended Topology Enumeration)
757782
*/
758-
read_intel_cores_exttopoenum(data, infos, 0x1f, src_cpuiddump);
783+
read_extended_topo(data, infos, 0x1f, src_cpuiddump);
759784

760785
} else if ((cpuid_type == intel || cpuid_type == amd || cpuid_type == zhaoxin)
761786
&& highest_cpuid >= 0x0b && has_x2apic(features)) {
762787
/* Get package/core/thread information from cpuid 0x0b
763788
* (Intel v1 Extended Topology Enumeration)
764789
*/
765-
read_intel_cores_exttopoenum(data, infos, 0x0b, src_cpuiddump);
790+
read_extended_topo(data, infos, 0x0b, src_cpuiddump);
766791
}
767792

768793
/**************************************
@@ -1063,6 +1088,13 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns
10631088

10641089
if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_GROUP)) {
10651090
if (fulldiscovery) {
1091+
if (data->found_unit_ids) {
1092+
/* Look for AMD Complex inside packages */
1093+
hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
1094+
hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset,
1095+
COMPLEX, "Complex",
1096+
HWLOC_GROUP_KIND_AMD_COMPLEX, 0);
1097+
}
10661098
if (data->found_unit_ids) {
10671099
/* Look for AMD Compute units inside packages */
10681100
hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
@@ -1858,6 +1890,7 @@ hwloc_x86_component_instantiate(struct hwloc_topology *topology,
18581890
data->apicid_unique = 1;
18591891
data->src_cpuiddump_path = NULL;
18601892
data->found_die_ids = 0;
1893+
data->found_complex_ids = 0;
18611894
data->found_unit_ids = 0;
18621895
data->found_module_ids = 0;
18631896
data->found_tile_ids = 0;

include/private/private.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* Copyright © 2009 CNRS
3-
* Copyright © 2009-2022 Inria. All rights reserved.
3+
* Copyright © 2009-2023 Inria. All rights reserved.
44
* Copyright © 2009-2012, 2020 Université Bordeaux
55
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
66
*
@@ -519,6 +519,7 @@ extern char * hwloc_progname(struct hwloc_topology *topology);
519519
#define HWLOC_GROUP_KIND_INTEL_DIE 104 /* no subkind */
520520
#define HWLOC_GROUP_KIND_S390_BOOK 110 /* subkind 0 is book, subkind 1 is drawer (group of books) */
521521
#define HWLOC_GROUP_KIND_AMD_COMPUTE_UNIT 120 /* no subkind */
522+
#define HWLOC_GROUP_KIND_AMD_COMPLEX 121 /* no subkind */
522523
/* then, OS-specific groups */
523524
#define HWLOC_GROUP_KIND_SOLARIS_PG_HW_PERF 200 /* subkind is group width */
524525
#define HWLOC_GROUP_KIND_AIX_SDL_UNKNOWN 210 /* subkind is SDL level */

0 commit comments

Comments
 (0)