Skip to content

Commit a418fa7

Browse files
authored
[win][aarch64] Add support for detecting the Host CPU on Arm64 Windows (#151596)
Uses the `CP 4000` registry keys under `HKLM\HARDWARE\DESCRIPTION\System\CentralProcessor\*` to get the Implementer and Part, which is then provided to a modified form of `getHostCPUNameForARM` to map to a CPU. On my local Surface Pro 11 `llc --version` reports: ``` > .\build\bin\llc.exe --version LLVM (http://llvm.org/): LLVM version 22.0.0git Optimized build with assertions. Default target: aarch64-pc-windows-msvc Host CPU: oryon-1 ```
1 parent a1209d8 commit a418fa7

File tree

3 files changed

+191
-39
lines changed

3 files changed

+191
-39
lines changed

llvm/include/llvm/TargetParser/Host.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#ifndef LLVM_TARGETPARSER_HOST_H
1414
#define LLVM_TARGETPARSER_HOST_H
1515

16+
#include "llvm/ADT/ArrayRef.h"
1617
#include "llvm/Support/Compiler.h"
1718
#include <string>
1819

@@ -63,6 +64,8 @@ namespace detail {
6364
/// Helper functions to extract HostCPUName from /proc/cpuinfo on linux.
6465
LLVM_ABI StringRef getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent);
6566
LLVM_ABI StringRef getHostCPUNameForARM(StringRef ProcCpuinfoContent);
67+
LLVM_ABI StringRef getHostCPUNameForARM(uint64_t PrimaryCpuInfo,
68+
ArrayRef<uint64_t> UniqueCpuInfos);
6669
LLVM_ABI StringRef getHostCPUNameForS390x(StringRef ProcCpuinfoContent);
6770
LLVM_ABI StringRef getHostCPUNameForRISCV(StringRef ProcCpuinfoContent);
6871
LLVM_ABI StringRef getHostCPUNameForSPARC(StringRef ProcCpuinfoContent);

llvm/lib/TargetParser/Host.cpp

Lines changed: 161 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@
1111
//===----------------------------------------------------------------------===//
1212

1313
#include "llvm/TargetParser/Host.h"
14+
#include "llvm/ADT/STLFunctionalExtras.h"
1415
#include "llvm/ADT/SmallVector.h"
16+
#include "llvm/ADT/StringExtras.h"
1517
#include "llvm/ADT/StringMap.h"
1618
#include "llvm/ADT/StringRef.h"
1719
#include "llvm/ADT/StringSwitch.h"
@@ -167,35 +169,10 @@ StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) {
167169
.Default(generic);
168170
}
169171

170-
StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
171-
// The cpuid register on arm is not accessible from user space. On Linux,
172-
// it is exposed through the /proc/cpuinfo file.
173-
174-
// Read 32 lines from /proc/cpuinfo, which should contain the CPU part line
175-
// in all cases.
176-
SmallVector<StringRef, 32> Lines;
177-
ProcCpuinfoContent.split(Lines, '\n');
178-
179-
// Look for the CPU implementer and hardware lines, and store the CPU part
180-
// numbers found.
181-
StringRef Implementer;
182-
StringRef Hardware;
183-
SmallVector<StringRef, 32> Parts;
184-
for (StringRef Line : Lines) {
185-
if (Line.consume_front("CPU implementer"))
186-
Implementer = Line.ltrim("\t :");
187-
else if (Line.consume_front("Hardware"))
188-
Hardware = Line.ltrim("\t :");
189-
else if (Line.consume_front("CPU part"))
190-
Parts.emplace_back(Line.ltrim("\t :"));
191-
}
192-
193-
// Last `Part' seen, in case we don't analyse all `Parts' parsed.
194-
StringRef Part = Parts.empty() ? StringRef() : Parts.back();
195-
196-
// Remove duplicate `Parts'.
197-
llvm::sort(Parts);
198-
Parts.erase(llvm::unique(Parts), Parts.end());
172+
StringRef
173+
getHostCPUNameForARMFromComponents(StringRef Implementer, StringRef Hardware,
174+
StringRef Part, ArrayRef<StringRef> Parts,
175+
function_ref<unsigned()> GetVariant) {
199176

200177
auto MatchBigLittle = [](auto const &Parts, StringRef Big, StringRef Little) {
201178
if (Parts.size() == 2)
@@ -343,21 +320,17 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
343320
if (Implementer == "0x53") { // Samsung Electronics Co., Ltd.
344321
// The Exynos chips have a convoluted ID scheme that doesn't seem to follow
345322
// any predictive pattern across variants and parts.
346-
unsigned Variant = 0, Part = 0;
347323

348324
// Look for the CPU variant line, whose value is a 1 digit hexadecimal
349325
// number, corresponding to the Variant bits in the CP15/C0 register.
350-
for (auto I : Lines)
351-
if (I.consume_front("CPU variant"))
352-
I.ltrim("\t :").getAsInteger(0, Variant);
326+
unsigned Variant = GetVariant();
353327

354-
// Look for the CPU part line, whose value is a 3 digit hexadecimal
355-
// number, corresponding to the PartNum bits in the CP15/C0 register.
356-
for (auto I : Lines)
357-
if (I.consume_front("CPU part"))
358-
I.ltrim("\t :").getAsInteger(0, Part);
328+
// Convert the CPU part line, whose value is a 3 digit hexadecimal number,
329+
// corresponding to the PartNum bits in the CP15/C0 register.
330+
unsigned PartAsInt;
331+
Part.getAsInteger(0, PartAsInt);
359332

360-
unsigned Exynos = (Variant << 12) | Part;
333+
unsigned Exynos = (Variant << 12) | PartAsInt;
361334
switch (Exynos) {
362335
default:
363336
// Default by falling through to Exynos M3.
@@ -416,6 +389,86 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
416389
return "generic";
417390
}
418391

392+
StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
393+
// The cpuid register on arm is not accessible from user space. On Linux,
394+
// it is exposed through the /proc/cpuinfo file.
395+
396+
// Read 32 lines from /proc/cpuinfo, which should contain the CPU part line
397+
// in all cases.
398+
SmallVector<StringRef, 32> Lines;
399+
ProcCpuinfoContent.split(Lines, '\n');
400+
401+
// Look for the CPU implementer and hardware lines, and store the CPU part
402+
// numbers found.
403+
StringRef Implementer;
404+
StringRef Hardware;
405+
SmallVector<StringRef, 32> Parts;
406+
for (StringRef Line : Lines) {
407+
if (Line.consume_front("CPU implementer"))
408+
Implementer = Line.ltrim("\t :");
409+
else if (Line.consume_front("Hardware"))
410+
Hardware = Line.ltrim("\t :");
411+
else if (Line.consume_front("CPU part"))
412+
Parts.emplace_back(Line.ltrim("\t :"));
413+
}
414+
415+
// Last `Part' seen, in case we don't analyse all `Parts' parsed.
416+
StringRef Part = Parts.empty() ? StringRef() : Parts.back();
417+
418+
// Remove duplicate `Parts'.
419+
llvm::sort(Parts);
420+
Parts.erase(llvm::unique(Parts), Parts.end());
421+
422+
auto GetVariant = [&]() {
423+
unsigned Variant = 0;
424+
for (auto I : Lines)
425+
if (I.consume_front("CPU variant"))
426+
I.ltrim("\t :").getAsInteger(0, Variant);
427+
return Variant;
428+
};
429+
430+
return getHostCPUNameForARMFromComponents(Implementer, Hardware, Part, Parts,
431+
GetVariant);
432+
}
433+
434+
StringRef sys::detail::getHostCPUNameForARM(uint64_t PrimaryCpuInfo,
435+
ArrayRef<uint64_t> UniqueCpuInfos) {
436+
// On Windows, the registry provides cached copied of the MIDR_EL1 register.
437+
union MIDR_EL1 {
438+
uint64_t Raw;
439+
struct _Components {
440+
uint64_t Revision : 4;
441+
uint64_t Partnum : 12;
442+
uint64_t Architecture : 4;
443+
uint64_t Variant : 4;
444+
uint64_t Implementer : 8;
445+
uint64_t Reserved : 32;
446+
} Components;
447+
};
448+
449+
SmallVector<std::string> PartsHolder;
450+
PartsHolder.reserve(UniqueCpuInfos.size());
451+
for (auto Info : UniqueCpuInfos)
452+
PartsHolder.push_back("0x" + utohexstr(MIDR_EL1{Info}.Components.Partnum,
453+
/*LowerCase*/ true,
454+
/*Width*/ 3));
455+
456+
SmallVector<StringRef> Parts;
457+
Parts.reserve(PartsHolder.size());
458+
for (const auto &Part : PartsHolder)
459+
Parts.push_back(Part);
460+
461+
return getHostCPUNameForARMFromComponents(
462+
"0x" + utohexstr(MIDR_EL1{PrimaryCpuInfo}.Components.Implementer,
463+
/*LowerCase*/ true,
464+
/*Width*/ 2),
465+
/*Hardware*/ "",
466+
"0x" + utohexstr(MIDR_EL1{PrimaryCpuInfo}.Components.Partnum,
467+
/*LowerCase*/ true,
468+
/*Width*/ 3),
469+
Parts, [=]() { return MIDR_EL1{PrimaryCpuInfo}.Components.Variant; });
470+
}
471+
419472
namespace {
420473
StringRef getCPUNameFromS390Model(unsigned int Id, bool HaveVectorSupport) {
421474
switch (Id) {
@@ -1450,6 +1503,75 @@ StringRef sys::getHostCPUName() {
14501503
return "generic";
14511504
}
14521505

1506+
#elif defined(_M_ARM64) || defined(_M_ARM64EC)
1507+
1508+
StringRef sys::getHostCPUName() {
1509+
constexpr char CentralProcessorKeyName[] =
1510+
"HARDWARE\\DESCRIPTION\\System\\CentralProcessor";
1511+
// Sub keys names are simple numbers ("0", "1", etc.) so 10 chars should be
1512+
// enough for the slash and name.
1513+
constexpr size_t SubKeyNameMaxSize = ARRAYSIZE(CentralProcessorKeyName) + 10;
1514+
1515+
SmallVector<uint64_t> Values;
1516+
uint64_t PrimaryCpuInfo;
1517+
char PrimaryPartKeyName[SubKeyNameMaxSize];
1518+
DWORD PrimaryPartKeyNameSize = 0;
1519+
HKEY CentralProcessorKey;
1520+
if (RegOpenKeyExA(HKEY_LOCAL_MACHINE, CentralProcessorKeyName, 0, KEY_READ,
1521+
&CentralProcessorKey) == ERROR_SUCCESS) {
1522+
for (unsigned Index = 0; Index < UINT32_MAX; ++Index) {
1523+
char SubKeyName[SubKeyNameMaxSize];
1524+
DWORD SubKeySize = SubKeyNameMaxSize;
1525+
HKEY SubKey;
1526+
if ((RegEnumKeyExA(CentralProcessorKey, Index, SubKeyName, &SubKeySize,
1527+
nullptr, nullptr, nullptr,
1528+
nullptr) == ERROR_SUCCESS) &&
1529+
(RegOpenKeyExA(CentralProcessorKey, SubKeyName, 0, KEY_READ,
1530+
&SubKey) == ERROR_SUCCESS)) {
1531+
// The "CP 4000" registry key contains a cached copy of the MIDR_EL1
1532+
// register.
1533+
uint64_t RegValue;
1534+
DWORD ActualType;
1535+
DWORD RegValueSize = sizeof(RegValue);
1536+
if ((RegQueryValueExA(SubKey, "CP 4000", nullptr, &ActualType,
1537+
(PBYTE)&RegValue,
1538+
&RegValueSize) == ERROR_SUCCESS) &&
1539+
(ActualType == REG_QWORD) && RegValueSize == sizeof(RegValue)) {
1540+
// Assume that the part with the "highest" reg key name is the primary
1541+
// part (to match the way that Linux's cpuinfo is written). Win32
1542+
// makes no guarantees about the order of sub keys, so we have to
1543+
// compare the names.
1544+
if (PrimaryPartKeyNameSize < SubKeySize ||
1545+
(PrimaryPartKeyNameSize == SubKeySize &&
1546+
::memcmp(SubKeyName, PrimaryPartKeyName, SubKeySize) > 0)) {
1547+
PrimaryCpuInfo = RegValue;
1548+
::memcpy(PrimaryPartKeyName, SubKeyName, SubKeySize + 1);
1549+
PrimaryPartKeyNameSize = SubKeySize;
1550+
}
1551+
if (!llvm::is_contained(Values, RegValue)) {
1552+
Values.push_back(RegValue);
1553+
}
1554+
}
1555+
RegCloseKey(SubKey);
1556+
} else {
1557+
// No more sub keys.
1558+
break;
1559+
}
1560+
}
1561+
RegCloseKey(CentralProcessorKey);
1562+
}
1563+
1564+
if (Values.empty()) {
1565+
return "generic";
1566+
}
1567+
1568+
// Win32 makes no guarantees about the order of sub keys, so sort to ensure
1569+
// reproducibility.
1570+
llvm::sort(Values);
1571+
1572+
return detail::getHostCPUNameForARM(PrimaryCpuInfo, Values);
1573+
}
1574+
14531575
#elif defined(__APPLE__) && defined(__powerpc__)
14541576
StringRef sys::getHostCPUName() {
14551577
host_basic_info_data_t hostInfo;

llvm/unittests/TargetParser/Host.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,16 +59,28 @@ Serial : 0000000000000000
5959

6060
EXPECT_EQ(sys::detail::getHostCPUNameForARM(CortexA9ProcCpuinfo),
6161
"cortex-a9");
62+
EXPECT_EQ(sys::detail::getHostCPUNameForARM(
63+
0x4100c090, ArrayRef<uint64_t>{0x4100c090, 0x4100c090}),
64+
"cortex-a9");
6265
EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x41\n"
6366
"CPU part : 0xc0f"),
6467
"cortex-a15");
68+
EXPECT_EQ(sys::detail::getHostCPUNameForARM(0x4100c0f0,
69+
ArrayRef<uint64_t>{0x4100c0f0}),
70+
"cortex-a15");
6571
// Verify that both CPU implementer and CPU part are checked:
6672
EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x40\n"
6773
"CPU part : 0xc0f"),
6874
"generic");
75+
EXPECT_EQ(sys::detail::getHostCPUNameForARM(0x4000c0f0,
76+
ArrayRef<uint64_t>{0x4000c0f0}),
77+
"generic");
6978
EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x51\n"
7079
"CPU part : 0x06f"),
7180
"krait");
81+
EXPECT_EQ(sys::detail::getHostCPUNameForARM(0x510006f0,
82+
ArrayRef<uint64_t>{0x510006f0}),
83+
"krait");
7284
}
7385

7486
TEST(getLinuxHostCPUName, AArch64) {
@@ -126,10 +138,16 @@ TEST(getLinuxHostCPUName, AArch64) {
126138
"CPU part : 0xd85\n"
127139
"CPU part : 0xd87"),
128140
"cortex-x925");
141+
EXPECT_EQ(sys::detail::getHostCPUNameForARM(
142+
0x4100d850, ArrayRef<uint64_t>{0x4100d850, 0x4100d870}),
143+
"cortex-x925");
129144
EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x41\n"
130145
"CPU part : 0xd87\n"
131146
"CPU part : 0xd85"),
132147
"cortex-x925");
148+
EXPECT_EQ(sys::detail::getHostCPUNameForARM(
149+
0x4100d870, ArrayRef<uint64_t>{0x4100d870, 0x4100d850}),
150+
"cortex-x925");
133151
EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x51\n"
134152
"CPU part : 0xc00"),
135153
"falkor");
@@ -200,16 +218,25 @@ CPU architecture: 8
200218
"CPU variant : 0xc\n"
201219
"CPU part : 0xafe"),
202220
"exynos-m3");
221+
EXPECT_EQ(sys::detail::getHostCPUNameForARM(
222+
0x53c0afe0, ArrayRef<uint64_t>{0x53c0afe0, 0x5300d050}),
223+
"exynos-m3");
203224
// Verify Exynos M3.
204225
EXPECT_EQ(sys::detail::getHostCPUNameForARM(ExynosProcCpuInfo +
205226
"CPU variant : 0x1\n"
206227
"CPU part : 0x002"),
207228
"exynos-m3");
229+
EXPECT_EQ(sys::detail::getHostCPUNameForARM(
230+
0x53100020, ArrayRef<uint64_t>{0x53100020, 0x5300d050}),
231+
"exynos-m3");
208232
// Verify Exynos M4.
209233
EXPECT_EQ(sys::detail::getHostCPUNameForARM(ExynosProcCpuInfo +
210234
"CPU variant : 0x1\n"
211235
"CPU part : 0x003"),
212236
"exynos-m4");
237+
EXPECT_EQ(sys::detail::getHostCPUNameForARM(
238+
0x53100030, ArrayRef<uint64_t>{0x53100030, 0x5300d050}),
239+
"exynos-m4");
213240

214241
const std::string ThunderX2T99ProcCpuInfo = R"(
215242
processor : 0

0 commit comments

Comments
 (0)