@@ -38,6 +38,12 @@ struct hwloc_x86_backend_data_s {
38
38
int apicid_unique ;
39
39
char * src_cpuiddump_path ;
40
40
int is_knl ;
41
+ int is_hybrid ;
42
+ int found_die_ids ;
43
+ int found_complex_ids ;
44
+ int found_unit_ids ;
45
+ int found_module_ids ;
46
+ int found_tile_ids ;
41
47
};
42
48
43
49
/************************************
@@ -210,7 +216,8 @@ struct procinfo {
210
216
#define TILE 4
211
217
#define MODULE 5
212
218
#define DIE 6
213
- #define HWLOC_X86_PROCINFO_ID_NR 7
219
+ #define COMPLEX 7
220
+ #define HWLOC_X86_PROCINFO_ID_NR 8
214
221
unsigned ids [HWLOC_X86_PROCINFO_ID_NR ];
215
222
unsigned * otherids ;
216
223
unsigned levels ;
@@ -480,7 +487,7 @@ static void read_amd_cores_legacy(struct procinfo *infos, struct cpuiddump *src_
480
487
}
481
488
482
489
/* AMD unit/node from CPUID 0x8000001e leaf (topoext) */
483
- static void read_amd_cores_topoext (struct procinfo * infos , unsigned long flags , struct cpuiddump * src_cpuiddump )
490
+ static void read_amd_cores_topoext (struct hwloc_x86_backend_data_s * data , struct procinfo * infos , unsigned long flags , struct cpuiddump * src_cpuiddump )
484
491
{
485
492
unsigned apic_id , nodes_per_proc = 0 ;
486
493
unsigned eax , ebx , ecx , edx ;
@@ -510,6 +517,7 @@ static void read_amd_cores_topoext(struct procinfo *infos, unsigned long flags,
510
517
unsigned cores_per_unit ;
511
518
/* coreid was obtained from read_amd_cores_legacy() earlier */
512
519
infos -> ids [UNIT ] = ebx & 0xff ;
520
+ data -> found_unit_ids = 1 ;
513
521
cores_per_unit = ((ebx >> 8 ) & 0xff ) + 1 ;
514
522
hwloc_debug ("topoext %08x, %u nodes, node %u, %u cores in unit %u\n" , apic_id , nodes_per_proc , infos -> ids [NODE ], cores_per_unit , infos -> ids [UNIT ]);
515
523
/* coreid and unitid are package-wide (core 0-15 and unit 0-7 on 16-core 2-NUMAnode processor).
@@ -524,10 +532,12 @@ static void read_amd_cores_topoext(struct procinfo *infos, unsigned long flags,
524
532
}
525
533
}
526
534
527
- /* Intel core/thread or even die/module/tile from CPUID 0x0b or 0x1f leaves (v1 and v2 extended topology enumeration) */
528
- static void read_intel_cores_exttopoenum (struct procinfo * infos , unsigned leaf , struct cpuiddump * src_cpuiddump )
535
+ /* Intel core/thread or even die/module/tile from CPUID 0x0b or 0x1f leaves (v1 and v2 extended topology enumeration)
536
+ * or AMD complex/ccd from CPUID 0x80000026 (extended CPU topology)
537
+ */
538
+ static void read_extended_topo (struct hwloc_x86_backend_data_s * data , struct procinfo * infos , unsigned leaf , struct cpuiddump * src_cpuiddump )
529
539
{
530
- unsigned level , apic_nextshift , apic_number , apic_type , apic_id = 0 , apic_shift = 0 , id ;
540
+ unsigned level , apic_nextshift , apic_type , apic_id = 0 , apic_shift = 0 , id ;
531
541
unsigned threadid __hwloc_attribute_unused = 0 ; /* shut-up compiler */
532
542
unsigned eax , ebx , ecx = 0 , edx ;
533
543
int apic_packageshift = 0 ;
@@ -536,7 +546,11 @@ static void read_intel_cores_exttopoenum(struct procinfo *infos, unsigned leaf,
536
546
ecx = level ;
537
547
eax = leaf ;
538
548
cpuid_or_from_dump (& eax , & ebx , & ecx , & edx , src_cpuiddump );
539
- if (!eax && !ebx )
549
+ /* Intel specifies that 0x0b/0x1f return 0 in ecx[8:15] and 0 in eax/ebx for invalid subleaves
550
+ * however AMD only says that 0x80000026/0x0b returns 0 in ebx[0:15].
551
+ * So use the common condition: 0 in ebx[0:15].
552
+ */
553
+ if (!(ebx & 0xffff ))
540
554
break ;
541
555
apic_packageshift = eax & 0x1f ;
542
556
}
@@ -549,43 +563,64 @@ static void read_intel_cores_exttopoenum(struct procinfo *infos, unsigned leaf,
549
563
ecx = level ;
550
564
eax = leaf ;
551
565
cpuid_or_from_dump (& eax , & ebx , & ecx , & edx , src_cpuiddump );
552
- if (!eax && ! ebx )
566
+ if (!( ebx & 0xffff ) )
553
567
break ;
554
568
apic_nextshift = eax & 0x1f ;
555
- apic_number = ebx & 0xffff ;
556
569
apic_type = (ecx & 0xff00 ) >> 8 ;
557
570
apic_id = edx ;
558
571
id = (apic_id >> apic_shift ) & ((1 << (apic_packageshift - apic_shift )) - 1 );
559
- hwloc_debug ("x2APIC %08x %u: nextshift %u num %2u type %u id %2u\n" , apic_id , level , apic_nextshift , apic_number , apic_type , id );
572
+ hwloc_debug ("x2APIC %08x %u: nextshift %u nextnumber %2u type %u id %2u\n" ,
573
+ apic_id ,
574
+ level ,
575
+ apic_nextshift ,
576
+ ebx & 0xffff /* number of threads in next level */ ,
577
+ apic_type ,
578
+ id );
560
579
infos -> apicid = apic_id ;
561
580
infos -> otherids [level ] = UINT_MAX ;
562
- switch (apic_type ) {
563
- case 1 :
564
- threadid = id ;
565
- /* apic_number is the actual number of threads per core */
566
- break ;
567
- case 2 :
568
- infos -> ids [CORE ] = id ;
569
- /* apic_number is the actual number of threads per die */
570
- break ;
571
- case 3 :
572
- infos -> ids [MODULE ] = id ;
573
- /* apic_number is the actual number of threads per tile */
574
- break ;
575
- case 4 :
576
- infos -> ids [TILE ] = id ;
577
- /* apic_number is the actual number of threads per die */
578
- break ;
579
- case 5 :
580
- infos -> ids [DIE ] = id ;
581
- /* apic_number is the actual number of threads per package */
582
- break ;
583
- default :
584
- hwloc_debug ("x2APIC %u: unknown type %u\n" , level , apic_type );
585
- infos -> otherids [level ] = apic_id >> apic_shift ;
586
- break ;
587
- }
588
- apic_shift = apic_nextshift ;
581
+ switch (apic_type ) {
582
+ case 1 :
583
+ threadid = id ;
584
+ break ;
585
+ case 2 :
586
+ infos -> ids [CORE ] = id ;
587
+ break ;
588
+ case 3 :
589
+ if (leaf == 0x80000026 ) {
590
+ data -> found_complex_ids = 1 ;
591
+ infos -> ids [COMPLEX ] = id ;
592
+ } else {
593
+ data -> found_module_ids = 1 ;
594
+ infos -> ids [MODULE ] = id ;
595
+ }
596
+ break ;
597
+ case 4 :
598
+ if (leaf == 0x80000026 ) {
599
+ data -> found_die_ids = 1 ;
600
+ infos -> ids [DIE ] = id ;
601
+ } else {
602
+ data -> found_tile_ids = 1 ;
603
+ infos -> ids [TILE ] = id ;
604
+ }
605
+ break ;
606
+ case 5 :
607
+ if (leaf == 0x80000026 ) {
608
+ goto unknown_type ;
609
+ } else {
610
+ data -> found_die_ids = 1 ;
611
+ infos -> ids [DIE ] = id ;
612
+ }
613
+ break ;
614
+ case 6 :
615
+ /* TODO: "DieGrp" on Intel */
616
+ /* fallthrough */
617
+ default :
618
+ unknown_type :
619
+ hwloc_debug ("x2APIC %u: unknown type %u\n" , level , apic_type );
620
+ infos -> otherids [level ] = apic_id >> apic_shift ;
621
+ break ;
622
+ }
623
+ apic_shift = apic_nextshift ;
589
624
}
590
625
infos -> apicid = apic_id ;
591
626
infos -> ids [PKG ] = apic_id >> apic_shift ;
@@ -704,12 +739,13 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns
704
739
}
705
740
706
741
if (highest_cpuid >= 0x1a && has_hybrid (features )) {
707
- /* Get hybrid cpu information from cpuid 0x1a */
742
+ /* Get hybrid cpu information from cpuid 0x1a on Intel */
708
743
eax = 0x1a ;
709
744
ecx = 0 ;
710
745
cpuid_or_from_dump (& eax , & ebx , & ecx , & edx , src_cpuiddump );
711
746
infos -> hybridcoretype = eax >> 24 ;
712
747
infos -> hybridnativemodel = eax & 0xffffff ;
748
+ data -> is_hybrid = 1 ;
713
749
}
714
750
715
751
/*********************************************************************************
@@ -731,21 +767,27 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns
731
767
*
732
768
* Only needed when x2apic supported if NUMA nodes are needed.
733
769
*/
734
- read_amd_cores_topoext (infos , flags , src_cpuiddump );
770
+ read_amd_cores_topoext (data , infos , flags , src_cpuiddump );
735
771
}
736
772
737
- if ((cpuid_type == intel ) && highest_cpuid >= 0x1f ) {
773
+ if ((cpuid_type == amd ) && highest_ext_cpuid >= 0x80000026 ) {
774
+ /* Get socket/die/complex/core/thread information from cpuid 0x80000026
775
+ * (AMD Extended CPU Topology)
776
+ */
777
+ read_extended_topo (data , infos , 0x80000026 , src_cpuiddump );
778
+
779
+ } else if ((cpuid_type == intel ) && highest_cpuid >= 0x1f ) {
738
780
/* Get package/die/module/tile/core/thread information from cpuid 0x1f
739
781
* (Intel v2 Extended Topology Enumeration)
740
782
*/
741
- read_intel_cores_exttopoenum ( infos , 0x1f , src_cpuiddump );
783
+ read_extended_topo ( data , infos , 0x1f , src_cpuiddump );
742
784
743
785
} else if ((cpuid_type == intel || cpuid_type == amd || cpuid_type == zhaoxin )
744
786
&& highest_cpuid >= 0x0b && has_x2apic (features )) {
745
787
/* Get package/core/thread information from cpuid 0x0b
746
788
* (Intel v1 Extended Topology Enumeration)
747
789
*/
748
- read_intel_cores_exttopoenum ( infos , 0x0b , src_cpuiddump );
790
+ read_extended_topo ( data , infos , 0x0b , src_cpuiddump );
749
791
}
750
792
751
793
/**************************************
@@ -1046,21 +1088,34 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns
1046
1088
1047
1089
if (hwloc_filter_check_keep_object_type (topology , HWLOC_OBJ_GROUP )) {
1048
1090
if (fulldiscovery ) {
1049
- /* Look for AMD Compute units inside packages */
1050
- hwloc_bitmap_copy (remaining_cpuset , complete_cpuset );
1051
- hwloc_x86_add_groups (topology , infos , nbprocs , remaining_cpuset ,
1052
- UNIT , "Compute Unit" ,
1053
- HWLOC_GROUP_KIND_AMD_COMPUTE_UNIT , 0 );
1054
- /* Look for Intel Modules inside packages */
1055
- hwloc_bitmap_copy (remaining_cpuset , complete_cpuset );
1056
- hwloc_x86_add_groups (topology , infos , nbprocs , remaining_cpuset ,
1057
- MODULE , "Module" ,
1058
- HWLOC_GROUP_KIND_INTEL_MODULE , 0 );
1059
- /* Look for Intel Tiles inside packages */
1060
- hwloc_bitmap_copy (remaining_cpuset , complete_cpuset );
1061
- hwloc_x86_add_groups (topology , infos , nbprocs , remaining_cpuset ,
1062
- TILE , "Tile" ,
1063
- HWLOC_GROUP_KIND_INTEL_TILE , 0 );
1091
+ if (data -> found_unit_ids ) {
1092
+ /* Look for AMD Complex inside packages */
1093
+ hwloc_bitmap_copy (remaining_cpuset , complete_cpuset );
1094
+ hwloc_x86_add_groups (topology , infos , nbprocs , remaining_cpuset ,
1095
+ COMPLEX , "Complex" ,
1096
+ HWLOC_GROUP_KIND_AMD_COMPLEX , 0 );
1097
+ }
1098
+ if (data -> found_unit_ids ) {
1099
+ /* Look for AMD Compute units inside packages */
1100
+ hwloc_bitmap_copy (remaining_cpuset , complete_cpuset );
1101
+ hwloc_x86_add_groups (topology , infos , nbprocs , remaining_cpuset ,
1102
+ UNIT , "Compute Unit" ,
1103
+ HWLOC_GROUP_KIND_AMD_COMPUTE_UNIT , 0 );
1104
+ }
1105
+ if (data -> found_module_ids ) {
1106
+ /* Look for Intel Modules inside packages */
1107
+ hwloc_bitmap_copy (remaining_cpuset , complete_cpuset );
1108
+ hwloc_x86_add_groups (topology , infos , nbprocs , remaining_cpuset ,
1109
+ MODULE , "Module" ,
1110
+ HWLOC_GROUP_KIND_INTEL_MODULE , 0 );
1111
+ }
1112
+ if (data -> found_tile_ids ) {
1113
+ /* Look for Intel Tiles inside packages */
1114
+ hwloc_bitmap_copy (remaining_cpuset , complete_cpuset );
1115
+ hwloc_x86_add_groups (topology , infos , nbprocs , remaining_cpuset ,
1116
+ TILE , "Tile" ,
1117
+ HWLOC_GROUP_KIND_INTEL_TILE , 0 );
1118
+ }
1064
1119
1065
1120
/* Look for unknown objects */
1066
1121
if (infos [one ].otherids ) {
@@ -1094,7 +1149,8 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns
1094
1149
}
1095
1150
}
1096
1151
1097
- if (hwloc_filter_check_keep_object_type (topology , HWLOC_OBJ_DIE )) {
1152
+ if (data -> found_die_ids
1153
+ && hwloc_filter_check_keep_object_type (topology , HWLOC_OBJ_DIE )) {
1098
1154
/* Look for Intel Dies inside packages */
1099
1155
if (fulldiscovery ) {
1100
1156
hwloc_bitmap_t die_cpuset ;
@@ -1349,35 +1405,39 @@ look_procs(struct hwloc_backend *backend, struct procinfo *infos, unsigned long
1349
1405
if (data -> apicid_unique ) {
1350
1406
summarize (backend , infos , flags );
1351
1407
1352
- if (has_hybrid (features ) && !(topology -> flags & HWLOC_TOPOLOGY_FLAG_NO_CPUKINDS )) {
1408
+ if (data -> is_hybrid
1409
+ && !(topology -> flags & HWLOC_TOPOLOGY_FLAG_NO_CPUKINDS )) {
1353
1410
/* use hybrid info for cpukinds */
1354
- hwloc_bitmap_t atomset = hwloc_bitmap_alloc ();
1355
- hwloc_bitmap_t coreset = hwloc_bitmap_alloc ();
1356
- for (i = 0 ; i < nbprocs ; i ++ ) {
1357
- if (infos [i ].hybridcoretype == 0x20 )
1358
- hwloc_bitmap_set (atomset , i );
1359
- else if (infos [i ].hybridcoretype == 0x40 )
1360
- hwloc_bitmap_set (coreset , i );
1361
- }
1362
- /* register IntelAtom set if any */
1363
- if (!hwloc_bitmap_iszero (atomset )) {
1364
- struct hwloc_info_s infoattr ;
1365
- infoattr .name = (char * ) "CoreType" ;
1366
- infoattr .value = (char * ) "IntelAtom" ;
1367
- hwloc_internal_cpukinds_register (topology , atomset , HWLOC_CPUKIND_EFFICIENCY_UNKNOWN , & infoattr , 1 , 0 );
1368
- /* the cpuset is given to the callee */
1369
- } else {
1370
- hwloc_bitmap_free (atomset );
1371
- }
1372
- /* register IntelCore set if any */
1373
- if (!hwloc_bitmap_iszero (coreset )) {
1374
- struct hwloc_info_s infoattr ;
1375
- infoattr .name = (char * ) "CoreType" ;
1376
- infoattr .value = (char * ) "IntelCore" ;
1377
- hwloc_internal_cpukinds_register (topology , coreset , HWLOC_CPUKIND_EFFICIENCY_UNKNOWN , & infoattr , 1 , 0 );
1378
- /* the cpuset is given to the callee */
1379
- } else {
1380
- hwloc_bitmap_free (coreset );
1411
+ if (cpuid_type == intel ) {
1412
+ /* Hybrid Intel */
1413
+ hwloc_bitmap_t atomset = hwloc_bitmap_alloc ();
1414
+ hwloc_bitmap_t coreset = hwloc_bitmap_alloc ();
1415
+ for (i = 0 ; i < nbprocs ; i ++ ) {
1416
+ if (infos [i ].hybridcoretype == 0x20 )
1417
+ hwloc_bitmap_set (atomset , i );
1418
+ else if (infos [i ].hybridcoretype == 0x40 )
1419
+ hwloc_bitmap_set (coreset , i );
1420
+ }
1421
+ /* register IntelAtom set if any */
1422
+ if (!hwloc_bitmap_iszero (atomset )) {
1423
+ struct hwloc_info_s infoattr ;
1424
+ infoattr .name = (char * ) "CoreType" ;
1425
+ infoattr .value = (char * ) "IntelAtom" ;
1426
+ hwloc_internal_cpukinds_register (topology , atomset , HWLOC_CPUKIND_EFFICIENCY_UNKNOWN , & infoattr , 1 , 0 );
1427
+ /* the cpuset is given to the callee */
1428
+ } else {
1429
+ hwloc_bitmap_free (atomset );
1430
+ }
1431
+ /* register IntelCore set if any */
1432
+ if (!hwloc_bitmap_iszero (coreset )) {
1433
+ struct hwloc_info_s infoattr ;
1434
+ infoattr .name = (char * ) "CoreType" ;
1435
+ infoattr .value = (char * ) "IntelCore" ;
1436
+ hwloc_internal_cpukinds_register (topology , coreset , HWLOC_CPUKIND_EFFICIENCY_UNKNOWN , & infoattr , 1 , 0 );
1437
+ /* the cpuset is given to the callee */
1438
+ } else {
1439
+ hwloc_bitmap_free (coreset );
1440
+ }
1381
1441
}
1382
1442
}
1383
1443
}
@@ -1459,7 +1519,15 @@ int hwloc_look_x86(struct hwloc_backend *backend, unsigned long flags)
1459
1519
unsigned i ;
1460
1520
unsigned highest_cpuid ;
1461
1521
unsigned highest_ext_cpuid ;
1462
- /* This stores cpuid features with the same indexing as Linux */
1522
+ /* This stores cpuid features with the same indexing as Linux:
1523
+ * [0] = 0x1 edx
1524
+ * [1] = 0x80000001 edx
1525
+ * [4] = 0x1 ecx
1526
+ * [6] = 0x80000001 ecx
1527
+ * [9] = 0x7/0 ebx
1528
+ * [16] = 0x7/0 ecx
1529
+ * [18] = 0x7/0 edx
1530
+ */
1463
1531
unsigned features [19 ] = { 0 };
1464
1532
struct procinfo * infos = NULL ;
1465
1533
enum cpuid_type cpuid_type = unknown ;
@@ -1579,6 +1647,7 @@ int hwloc_look_x86(struct hwloc_backend *backend, unsigned long flags)
1579
1647
ecx = 0 ;
1580
1648
cpuid_or_from_dump (& eax , & ebx , & ecx , & edx , src_cpuiddump );
1581
1649
features [9 ] = ebx ;
1650
+ features [16 ] = ecx ;
1582
1651
features [18 ] = edx ;
1583
1652
}
1584
1653
@@ -1816,9 +1885,15 @@ hwloc_x86_component_instantiate(struct hwloc_topology *topology,
1816
1885
1817
1886
/* default values */
1818
1887
data -> is_knl = 0 ;
1888
+ data -> is_hybrid = 0 ;
1819
1889
data -> apicid_set = hwloc_bitmap_alloc ();
1820
1890
data -> apicid_unique = 1 ;
1821
1891
data -> src_cpuiddump_path = NULL ;
1892
+ data -> found_die_ids = 0 ;
1893
+ data -> found_complex_ids = 0 ;
1894
+ data -> found_unit_ids = 0 ;
1895
+ data -> found_module_ids = 0 ;
1896
+ data -> found_tile_ids = 0 ;
1822
1897
1823
1898
src_cpuiddump_path = getenv ("HWLOC_CPUID_PATH" );
1824
1899
if (src_cpuiddump_path ) {
0 commit comments