From 889175da54a7769c23f8390b213cfcc22369c365 Mon Sep 17 00:00:00 2001 From: Daniel Kiss Date: Tue, 8 Oct 2024 22:55:07 +0200 Subject: [PATCH 1/8] Add cpu model init for Windows. --- compiler-rt/lib/builtins/cpu_model/aarch64.c | 2 + .../cpu_model/aarch64/fmv/windows.inc | 42 +++++++++++++++++++ .../lib/builtins/cpu_model/cpu_model.h | 10 ++++- 3 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64.c b/compiler-rt/lib/builtins/cpu_model/aarch64.c index ea2da23a95278..def11f88c4854 100644 --- a/compiler-rt/lib/builtins/cpu_model/aarch64.c +++ b/compiler-rt/lib/builtins/cpu_model/aarch64.c @@ -76,6 +76,8 @@ struct { #elif defined(__linux__) && __has_include() #include "aarch64/fmv/mrs.inc" #include "aarch64/fmv/getauxval.inc" +#elif defined(_WIN32) +#include "aarch64/fmv/windows.inc" #else #include "aarch64/fmv/unimplemented.inc" #endif diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc new file mode 100644 index 0000000000000..fba4d8aed89bb --- /dev/null +++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc @@ -0,0 +1,42 @@ +#ifndef _ARM64_ +#define _ARM64_ +#endif +#include +#include + +void __init_cpu_features_resolver(unsigned long hwcap, + const __ifunc_arg_t *arg) {} + +void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) { + if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED)) + return; + + #define setCPUFeature(F) features |= 1ULL << F + + uint64_t features = 0; + + setCPUFeature(FEAT_INIT); + setCPUFeature(FEAT_FP); + + // https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent + if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)) + setCPUFeature(FEAT_CRC); + if (IsProcessorFeaturePresent(PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE)) + setCPUFeature(FEAT_LSE); + if (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)) + setCPUFeature(FEAT_DOTPROD); + + if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) { + setCPUFeature(FEAT_AES); + setCPUFeature(FEAT_SHA2); + setCPUFeature(FEAT_PMULL); + } + if (IsProcessorFeaturePresent(PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE)) + setCPUFeature(FEAT_JSCVT); + + if (IsProcessorFeaturePresent(PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE)) + setCPUFeature(FEAT_RCPC); + + __atomic_store(&__aarch64_cpu_features.features, &features, + __ATOMIC_RELAXED); +} diff --git a/compiler-rt/lib/builtins/cpu_model/cpu_model.h b/compiler-rt/lib/builtins/cpu_model/cpu_model.h index 924ca89cf60f5..5f9079c4e67ae 100644 --- a/compiler-rt/lib/builtins/cpu_model/cpu_model.h +++ b/compiler-rt/lib/builtins/cpu_model/cpu_model.h @@ -31,7 +31,15 @@ // We're choosing init priority 90 to force our constructors to run before any // constructors in the end user application (starting at priority 101). This // value matches the libgcc choice for the same functions. -#define CONSTRUCTOR_ATTRIBUTE __attribute__((constructor(90))) +#ifdef _WIN64 +// Contructor that replaces the ifunc runs currently with prio 10, see +// the LowerIFuncPass. The resolver of FMV depends on the cpu features so set +// the priority to 9. +#define CONSTRUCTOR_PRIOTITY 9 +#else +#define CONSTRUCTOR_PRIOTITY 90 +#endif +#define CONSTRUCTOR_ATTRIBUTE __attribute__((constructor(CONSTRUCTOR_PRIOTITY))) #else // FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that // this runs during initialization. From ab190811c62f089587b65a74a8c87cd94e6259a6 Mon Sep 17 00:00:00 2001 From: Daniel Kiss Date: Fri, 11 Oct 2024 17:52:58 +0200 Subject: [PATCH 2/8] Add more features, refactor --- .../cpu_model/aarch64/fmv/windows.inc | 81 ++++++++++++++++--- 1 file changed, 70 insertions(+), 11 deletions(-) diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc index fba4d8aed89bb..44ed8510b3515 100644 --- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc +++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc @@ -4,6 +4,52 @@ #include #include +#ifndef PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE +#define PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE -1 +#endif +#ifndef PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE +#define PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE -1 +#endif +#ifndef PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE +#define PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE -1 +#endif +#ifndef PF_ARM_SVE_INSTRUCTIONS_AVAILABLE +#define PF_ARM_SVE_INSTRUCTIONS_AVAILABLE -1 +#endif +#ifndef PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE +#define PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE -1 +#endif +#ifndef PF_ARM_SVE_AES_INSTRUCTIONS_AVAILABLE +#define PF_ARM_SVE_AES_INSTRUCTIONS_AVAILABLE -1 +#endif +#ifndef PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE +#define PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE -1 +#endif +#ifndef PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE +#define PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE -1 +#endif +#ifndef PF_ARM_SVE_BF16_INSTRUCTIONS_AVAILABLE +#define PF_ARM_SVE_BF16_INSTRUCTIONS_AVAILABLE -1 +#endif +#ifndef PF_ARM_SVE_EBF16_INSTRUCTIONS_AVAILABLE +#define PF_ARM_SVE_EBF16_INSTRUCTIONS_AVAILABLE -1 +#endif +#ifndef PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE +#define PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE -1 +#endif +#ifndef PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE +#define PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE -1 +#endif +#ifndef PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE +#define PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE -1 +#endif +#ifndef PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE +#define PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE -1 +#endif +#ifndef PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE +#define PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE -1 +#endif + void __init_cpu_features_resolver(unsigned long hwcap, const __ifunc_arg_t *arg) {} @@ -19,23 +65,36 @@ void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) { setCPUFeature(FEAT_FP); // https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent - if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)) - setCPUFeature(FEAT_CRC); - if (IsProcessorFeaturePresent(PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE)) - setCPUFeature(FEAT_LSE); - if (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)) - setCPUFeature(FEAT_DOTPROD); - if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) { setCPUFeature(FEAT_AES); setCPUFeature(FEAT_SHA2); setCPUFeature(FEAT_PMULL); } - if (IsProcessorFeaturePresent(PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE)) - setCPUFeature(FEAT_JSCVT); - if (IsProcessorFeaturePresent(PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE)) - setCPUFeature(FEAT_RCPC); + static const struct ProcessFeatureToFeatMap_t { + int WinApiFeature; + enum CPUFeatures CPUFeature; + } FeatMap[] = { + {PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE, FEAT_CRC}, + {PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE, FEAT_LSE}, + {PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE, FEAT_DOTPROD}, + {PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE, FEAT_JSCVT}, + {PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE, FEAT_RCPC}, + {PF_ARM_SVE_INSTRUCTIONS_AVAILABLE, FEAT_SVE}, + {PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE, FEAT_SVE2}, + {PF_ARM_SVE_AES_INSTRUCTIONS_AVAILABLE, FEAT_SVE_AES}, + {PF_ARM_SVE_BF16_INSTRUCTIONS_AVAILABLE, FEAT_SVE_BF16}, + {PF_ARM_SVE_EBF16_INSTRUCTIONS_AVAILABLE, FEAT_SVE_EBF16}, + {PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE, FEAT_SVE_SHA3}, + {PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE, FEAT_SVE_SM4}, + {PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE, FEAT_SVE_F32MM}, + {PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE, FEAT_SVE_F64MM}, + }; + + for (size_t I = 0, E = sizeof(FeatMap) / sizeof(FeatMap[0]); + I != E; ++I) + if ((FeatMap[I].WinApiFeature != -1) && IsProcessorFeaturePresent(FeatMap[I].WinApiFeature)) + setCPUFeature(FeatMap[I].CPUFeature); __atomic_store(&__aarch64_cpu_features.features, &features, __ATOMIC_RELAXED); From df55f291786f22610fdcbbc5b8ab87e0bd122bd1 Mon Sep 17 00:00:00 2001 From: Daniel Kiss Date: Fri, 11 Oct 2024 22:53:52 +0200 Subject: [PATCH 3/8] clang-format --- .../cpu_model/aarch64/fmv/windows.inc | 41 +++++++++---------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc index 44ed8510b3515..dc8eaff5fc529 100644 --- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc +++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc @@ -53,11 +53,11 @@ void __init_cpu_features_resolver(unsigned long hwcap, const __ifunc_arg_t *arg) {} -void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) { +void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) { if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED)) return; - #define setCPUFeature(F) features |= 1ULL << F +#define setCPUFeature(F) features |= 1ULL << F uint64_t features = 0; @@ -75,27 +75,26 @@ void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) { int WinApiFeature; enum CPUFeatures CPUFeature; } FeatMap[] = { - {PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE, FEAT_CRC}, - {PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE, FEAT_LSE}, - {PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE, FEAT_DOTPROD}, - {PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE, FEAT_JSCVT}, - {PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE, FEAT_RCPC}, - {PF_ARM_SVE_INSTRUCTIONS_AVAILABLE, FEAT_SVE}, - {PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE, FEAT_SVE2}, - {PF_ARM_SVE_AES_INSTRUCTIONS_AVAILABLE, FEAT_SVE_AES}, - {PF_ARM_SVE_BF16_INSTRUCTIONS_AVAILABLE, FEAT_SVE_BF16}, - {PF_ARM_SVE_EBF16_INSTRUCTIONS_AVAILABLE, FEAT_SVE_EBF16}, - {PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE, FEAT_SVE_SHA3}, - {PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE, FEAT_SVE_SM4}, - {PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE, FEAT_SVE_F32MM}, - {PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE, FEAT_SVE_F64MM}, + {PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE, FEAT_CRC}, + {PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE, FEAT_LSE}, + {PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE, FEAT_DOTPROD}, + {PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE, FEAT_JSCVT}, + {PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE, FEAT_RCPC}, + {PF_ARM_SVE_INSTRUCTIONS_AVAILABLE, FEAT_SVE}, + {PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE, FEAT_SVE2}, + {PF_ARM_SVE_AES_INSTRUCTIONS_AVAILABLE, FEAT_SVE_AES}, + {PF_ARM_SVE_BF16_INSTRUCTIONS_AVAILABLE, FEAT_SVE_BF16}, + {PF_ARM_SVE_EBF16_INSTRUCTIONS_AVAILABLE, FEAT_SVE_EBF16}, + {PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE, FEAT_SVE_SHA3}, + {PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE, FEAT_SVE_SM4}, + {PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE, FEAT_SVE_F32MM}, + {PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE, FEAT_SVE_F64MM}, }; - for (size_t I = 0, E = sizeof(FeatMap) / sizeof(FeatMap[0]); - I != E; ++I) - if ((FeatMap[I].WinApiFeature != -1) && IsProcessorFeaturePresent(FeatMap[I].WinApiFeature)) + for (size_t I = 0, E = sizeof(FeatMap) / sizeof(FeatMap[0]); I != E; ++I) + if ((FeatMap[I].WinApiFeature != -1) && + IsProcessorFeaturePresent(FeatMap[I].WinApiFeature)) setCPUFeature(FeatMap[I].CPUFeature); - __atomic_store(&__aarch64_cpu_features.features, &features, - __ATOMIC_RELAXED); + __atomic_store(&__aarch64_cpu_features.features, &features, __ATOMIC_RELAXED); } From eda389814bc59c5d5063d25be526fc223f3ecc34 Mon Sep 17 00:00:00 2001 From: Daniel Kiss Date: Mon, 14 Oct 2024 17:48:40 +0200 Subject: [PATCH 4/8] Address review comments --- compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc | 5 ++--- compiler-rt/lib/builtins/cpu_model/cpu_model.h | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc index dc8eaff5fc529..109a0b9cdea00 100644 --- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc +++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc @@ -1,6 +1,5 @@ -#ifndef _ARM64_ -#define _ARM64_ -#endif +#define WIN32_LEAN_AND_MEAN +#include #include #include diff --git a/compiler-rt/lib/builtins/cpu_model/cpu_model.h b/compiler-rt/lib/builtins/cpu_model/cpu_model.h index 5f9079c4e67ae..5c63175dfe9cf 100644 --- a/compiler-rt/lib/builtins/cpu_model/cpu_model.h +++ b/compiler-rt/lib/builtins/cpu_model/cpu_model.h @@ -31,7 +31,7 @@ // We're choosing init priority 90 to force our constructors to run before any // constructors in the end user application (starting at priority 101). This // value matches the libgcc choice for the same functions. -#ifdef _WIN64 +#ifdef _WIN32 // Contructor that replaces the ifunc runs currently with prio 10, see // the LowerIFuncPass. The resolver of FMV depends on the cpu features so set // the priority to 9. From 767064172738daea91eef97c81a1705e8144cdc6 Mon Sep 17 00:00:00 2001 From: Daniel Kiss Date: Tue, 15 Oct 2024 08:30:39 +0200 Subject: [PATCH 5/8] fix spelling --- compiler-rt/lib/builtins/cpu_model/cpu_model.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/compiler-rt/lib/builtins/cpu_model/cpu_model.h b/compiler-rt/lib/builtins/cpu_model/cpu_model.h index 5c63175dfe9cf..3bc4e63c4f25a 100644 --- a/compiler-rt/lib/builtins/cpu_model/cpu_model.h +++ b/compiler-rt/lib/builtins/cpu_model/cpu_model.h @@ -35,11 +35,11 @@ // Contructor that replaces the ifunc runs currently with prio 10, see // the LowerIFuncPass. The resolver of FMV depends on the cpu features so set // the priority to 9. -#define CONSTRUCTOR_PRIOTITY 9 +#define CONSTRUCTOR_PRIORITY 9 #else -#define CONSTRUCTOR_PRIOTITY 90 +#define CONSTRUCTOR_PRIORITY 90 #endif -#define CONSTRUCTOR_ATTRIBUTE __attribute__((constructor(CONSTRUCTOR_PRIOTITY))) +#define CONSTRUCTOR_ATTRIBUTE __attribute__((constructor(CONSTRUCTOR_PRIORITY))) #else // FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that // this runs during initialization. From 56ccce4fb033e86b2999742498732d3812ae1bf1 Mon Sep 17 00:00:00 2001 From: Daniel Kiss Date: Tue, 15 Oct 2024 08:32:31 +0200 Subject: [PATCH 6/8] Use SDK values for even when they are not defined --- .../cpu_model/aarch64/fmv/windows.inc | 33 +++++++++---------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc index 109a0b9cdea00..d6368c29d9872 100644 --- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc +++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc @@ -4,49 +4,49 @@ #include #ifndef PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE -#define PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE -1 +#define PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE 43 #endif #ifndef PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE -#define PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE -1 +#define PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE 44 #endif #ifndef PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE -#define PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE -1 +#define PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE 45 #endif #ifndef PF_ARM_SVE_INSTRUCTIONS_AVAILABLE -#define PF_ARM_SVE_INSTRUCTIONS_AVAILABLE -1 +#define PF_ARM_SVE_INSTRUCTIONS_AVAILABLE 46 #endif #ifndef PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE -#define PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE -1 +#define PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE 47 #endif #ifndef PF_ARM_SVE_AES_INSTRUCTIONS_AVAILABLE -#define PF_ARM_SVE_AES_INSTRUCTIONS_AVAILABLE -1 +#define PF_ARM_SVE_AES_INSTRUCTIONS_AVAILABLE 49 #endif #ifndef PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE -#define PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE -1 +#define PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE 50 #endif #ifndef PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE -#define PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE -1 +#define PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE 51 #endif #ifndef PF_ARM_SVE_BF16_INSTRUCTIONS_AVAILABLE -#define PF_ARM_SVE_BF16_INSTRUCTIONS_AVAILABLE -1 +#define PF_ARM_SVE_BF16_INSTRUCTIONS_AVAILABLE 52 #endif #ifndef PF_ARM_SVE_EBF16_INSTRUCTIONS_AVAILABLE -#define PF_ARM_SVE_EBF16_INSTRUCTIONS_AVAILABLE -1 +#define PF_ARM_SVE_EBF16_INSTRUCTIONS_AVAILABLE 53 #endif #ifndef PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE -#define PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE -1 +#define PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE 55 #endif #ifndef PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE -#define PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE -1 +#define PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE 56 #endif #ifndef PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE -#define PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE -1 +#define PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE 57 #endif #ifndef PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE -#define PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE -1 +#define PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE 58 #endif #ifndef PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE -#define PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE -1 +#define PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE 59 #endif void __init_cpu_features_resolver(unsigned long hwcap, @@ -91,8 +91,7 @@ void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) { }; for (size_t I = 0, E = sizeof(FeatMap) / sizeof(FeatMap[0]); I != E; ++I) - if ((FeatMap[I].WinApiFeature != -1) && - IsProcessorFeaturePresent(FeatMap[I].WinApiFeature)) + if (IsProcessorFeaturePresent(FeatMap[I].WinApiFeature)) setCPUFeature(FeatMap[I].CPUFeature); __atomic_store(&__aarch64_cpu_features.features, &features, __ATOMIC_RELAXED); From 8f784a5452f010a06c9644f38d989f0032c8177d Mon Sep 17 00:00:00 2001 From: Daniel Kiss Date: Fri, 8 Nov 2024 10:58:17 +0100 Subject: [PATCH 7/8] address review comments --- compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc index d6368c29d9872..dd30667deb686 100644 --- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc +++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc @@ -65,7 +65,6 @@ void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) { // https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) { - setCPUFeature(FEAT_AES); setCPUFeature(FEAT_SHA2); setCPUFeature(FEAT_PMULL); } @@ -81,13 +80,13 @@ void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) { {PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE, FEAT_RCPC}, {PF_ARM_SVE_INSTRUCTIONS_AVAILABLE, FEAT_SVE}, {PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE, FEAT_SVE2}, - {PF_ARM_SVE_AES_INSTRUCTIONS_AVAILABLE, FEAT_SVE_AES}, - {PF_ARM_SVE_BF16_INSTRUCTIONS_AVAILABLE, FEAT_SVE_BF16}, - {PF_ARM_SVE_EBF16_INSTRUCTIONS_AVAILABLE, FEAT_SVE_EBF16}, + {PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE, FEAT_SVE_PMULL128}, {PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE, FEAT_SVE_SHA3}, {PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE, FEAT_SVE_SM4}, {PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE, FEAT_SVE_F32MM}, {PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE, FEAT_SVE_F64MM}, + // There is no I8MM flag, but when SVE_I8MM is available, I8MM is too. + {PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE, FEAT_I8MM}, }; for (size_t I = 0, E = sizeof(FeatMap) / sizeof(FeatMap[0]); I != E; ++I) From 90ed73d7073c2d2d68b2ec38e7bb10a7fa6a83c7 Mon Sep 17 00:00:00 2001 From: Daniel Kiss Date: Fri, 8 Nov 2024 12:38:59 +0100 Subject: [PATCH 8/8] drop unused defines --- .../lib/builtins/cpu_model/aarch64/fmv/windows.inc | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc index dd30667deb686..2ca18242fba3e 100644 --- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc +++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc @@ -18,21 +18,9 @@ #ifndef PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE #define PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE 47 #endif -#ifndef PF_ARM_SVE_AES_INSTRUCTIONS_AVAILABLE -#define PF_ARM_SVE_AES_INSTRUCTIONS_AVAILABLE 49 -#endif #ifndef PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE #define PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE 50 #endif -#ifndef PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE -#define PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE 51 -#endif -#ifndef PF_ARM_SVE_BF16_INSTRUCTIONS_AVAILABLE -#define PF_ARM_SVE_BF16_INSTRUCTIONS_AVAILABLE 52 -#endif -#ifndef PF_ARM_SVE_EBF16_INSTRUCTIONS_AVAILABLE -#define PF_ARM_SVE_EBF16_INSTRUCTIONS_AVAILABLE 53 -#endif #ifndef PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE #define PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE 55 #endif