Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions compiler-rt/lib/builtins/cpu_model/aarch64.c
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ struct {
#elif defined(__linux__) && __has_include(<sys/auxv.h>)
#include "aarch64/fmv/mrs.inc"
#include "aarch64/fmv/getauxval.inc"
#elif defined(_WIN32)
#include "aarch64/fmv/windows.inc"
#else
#include "aarch64/fmv/unimplemented.inc"
#endif
Expand Down
98 changes: 98 additions & 0 deletions compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#include <processthreadsapi.h>
#include <stdint.h>

#ifndef PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE
#define PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE 43
#endif
#ifndef PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE
#define PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE 44
#endif
#ifndef PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE
#define PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE 45
#endif
#ifndef PF_ARM_SVE_INSTRUCTIONS_AVAILABLE
#define PF_ARM_SVE_INSTRUCTIONS_AVAILABLE 46
#endif
#ifndef PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE
#define PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE 47
#endif
#ifndef PF_ARM_SVE_AES_INSTRUCTIONS_AVAILABLE
#define PF_ARM_SVE_AES_INSTRUCTIONS_AVAILABLE 49
#endif
#ifndef PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE
#define PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE 50
#endif
#ifndef PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE
#define PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE 51
#endif
#ifndef PF_ARM_SVE_BF16_INSTRUCTIONS_AVAILABLE
#define PF_ARM_SVE_BF16_INSTRUCTIONS_AVAILABLE 52
#endif
#ifndef PF_ARM_SVE_EBF16_INSTRUCTIONS_AVAILABLE
#define PF_ARM_SVE_EBF16_INSTRUCTIONS_AVAILABLE 53
#endif
#ifndef PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE
#define PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE 55
#endif
#ifndef PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE
#define PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE 56
#endif
#ifndef PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE
#define PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE 57
#endif
#ifndef PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE
#define PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE 58
#endif
#ifndef PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE
#define PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE 59
#endif

void __init_cpu_features_resolver(unsigned long hwcap,
const __ifunc_arg_t *arg) {}

void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
return;

#define setCPUFeature(F) features |= 1ULL << F

uint64_t features = 0;

setCPUFeature(FEAT_INIT);
setCPUFeature(FEAT_FP);

// https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent
if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) {
setCPUFeature(FEAT_AES);
setCPUFeature(FEAT_SHA2);
setCPUFeature(FEAT_PMULL);
}

static const struct ProcessFeatureToFeatMap_t {
int WinApiFeature;
enum CPUFeatures CPUFeature;
} FeatMap[] = {
{PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE, FEAT_CRC},
{PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE, FEAT_LSE},
{PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE, FEAT_DOTPROD},
{PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE, FEAT_JSCVT},
{PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE, FEAT_RCPC},
{PF_ARM_SVE_INSTRUCTIONS_AVAILABLE, FEAT_SVE},
{PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE, FEAT_SVE2},
{PF_ARM_SVE_AES_INSTRUCTIONS_AVAILABLE, FEAT_SVE_AES},
{PF_ARM_SVE_BF16_INSTRUCTIONS_AVAILABLE, FEAT_SVE_BF16},
{PF_ARM_SVE_EBF16_INSTRUCTIONS_AVAILABLE, FEAT_SVE_EBF16},
{PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE, FEAT_SVE_SHA3},
{PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE, FEAT_SVE_SM4},
{PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE, FEAT_SVE_F32MM},
{PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE, FEAT_SVE_F64MM},
};

for (size_t I = 0, E = sizeof(FeatMap) / sizeof(FeatMap[0]); I != E; ++I)
if (IsProcessorFeaturePresent(FeatMap[I].WinApiFeature))
setCPUFeature(FeatMap[I].CPUFeature);

__atomic_store(&__aarch64_cpu_features.features, &features, __ATOMIC_RELAXED);
}
10 changes: 9 additions & 1 deletion compiler-rt/lib/builtins/cpu_model/cpu_model.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,15 @@
// We're choosing init priority 90 to force our constructors to run before any
// constructors in the end user application (starting at priority 101). This
// value matches the libgcc choice for the same functions.
#define CONSTRUCTOR_ATTRIBUTE __attribute__((constructor(90)))
#ifdef _WIN32
// Contructor that replaces the ifunc runs currently with prio 10, see
// the LowerIFuncPass. The resolver of FMV depends on the cpu features so set
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wondered what this was about, if it was some feature in the compiler I was unaware of - but then I saw the other PR filed at the same time :-)

// the priority to 9.
#define CONSTRUCTOR_PRIORITY 9
#else
#define CONSTRUCTOR_PRIORITY 90
#endif
#define CONSTRUCTOR_ATTRIBUTE __attribute__((constructor(CONSTRUCTOR_PRIORITY)))
#else
// FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that
// this runs during initialization.
Expand Down
Loading