Skip to content

Commit 590bb3e

Browse files
[AArch64] Improve host feature detection. (#160410)
SVE depends on a combination of host support and operating system support. Sometimes those don't line up with detected host CPU name; make sure SVE is disabled when it isn't available. Implement this for both Windows and Linux. (We don't have a codepath for other operating systems. If someone wants to implement this, it should be possible to adapt fmv code from compiler-rt.) While I'm here, also add support for detecting other Windows CPU features. For Windows, declare constants ourselves so the code builds on older SDKs; we also do this in compiler-rt.
1 parent 4650f85 commit 590bb3e

File tree

1 file changed

+63
-2
lines changed

1 file changed

+63
-2
lines changed

llvm/lib/TargetParser/Host.cpp

Lines changed: 63 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2277,20 +2277,81 @@ StringMap<bool> sys::getHostCPUFeatures() {
22772277
uint32_t Sha2 = CAP_SHA1 | CAP_SHA2;
22782278
Features["aes"] = (crypto & Aes) == Aes;
22792279
Features["sha2"] = (crypto & Sha2) == Sha2;
2280+
2281+
// Even if an underlying core supports SVE, it might not be available if
2282+
// it's disabled by the OS, or some other layer. Disable SVE if we don't
2283+
// detect support at runtime.
2284+
if (!Features.contains("sve"))
2285+
Features["sve"] = false;
22802286
#endif
22812287

22822288
return Features;
22832289
}
22842290
#elif defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64) || \
22852291
defined(__arm64ec__) || defined(_M_ARM64EC))
2292+
#ifndef PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE
2293+
#define PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE 43
2294+
#endif
2295+
#ifndef PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE
2296+
#define PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE 44
2297+
#endif
2298+
#ifndef PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE
2299+
#define PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE 45
2300+
#endif
2301+
#ifndef PF_ARM_SVE_INSTRUCTIONS_AVAILABLE
2302+
#define PF_ARM_SVE_INSTRUCTIONS_AVAILABLE 46
2303+
#endif
2304+
#ifndef PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE
2305+
#define PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE 47
2306+
#endif
2307+
#ifndef PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE
2308+
#define PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE 50
2309+
#endif
2310+
#ifndef PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE
2311+
#define PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE 55
2312+
#endif
2313+
#ifndef PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE
2314+
#define PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE 56
2315+
#endif
2316+
#ifndef PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE
2317+
#define PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE 57
2318+
#endif
2319+
#ifndef PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE
2320+
#define PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE 58
2321+
#endif
2322+
#ifndef PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE
2323+
#define PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE 59
2324+
#endif
22862325
StringMap<bool> sys::getHostCPUFeatures() {
22872326
StringMap<bool> Features;
22882327

22892328
// If we're asking the OS at runtime, believe what the OS says
2290-
Features["neon"] =
2291-
IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE);
22922329
Features["crc"] =
22932330
IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE);
2331+
Features["lse"] =
2332+
IsProcessorFeaturePresent(PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE);
2333+
Features["dotprod"] =
2334+
IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE);
2335+
Features["jsconv"] =
2336+
IsProcessorFeaturePresent(PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE);
2337+
Features["rcpc"] =
2338+
IsProcessorFeaturePresent(PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE);
2339+
Features["sve"] =
2340+
IsProcessorFeaturePresent(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE);
2341+
Features["sve2"] =
2342+
IsProcessorFeaturePresent(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE);
2343+
Features["sve-aes"] =
2344+
IsProcessorFeaturePresent(PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE);
2345+
Features["sve-sha3"] =
2346+
IsProcessorFeaturePresent(PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE);
2347+
Features["sve-sm4"] =
2348+
IsProcessorFeaturePresent(PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE);
2349+
Features["f32mm"] =
2350+
IsProcessorFeaturePresent(PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE);
2351+
Features["f64mm"] =
2352+
IsProcessorFeaturePresent(PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE);
2353+
Features["i8mm"] =
2354+
IsProcessorFeaturePresent(PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE);
22942355

22952356
// Avoid inferring "crypto" means more than the traditional AES + SHA2
22962357
bool TradCrypto =

0 commit comments

Comments
 (0)