|
8 | 8 |
|
9 | 9 | static std::unordered_map<std::string, bool> xss_cpu_features;
|
10 | 10 |
|
11 |
| -inline void xss_cpu_init() |
| 11 | +static bool os_supports_avx() |
12 | 12 | {
|
13 |
| - int cpuInfo[4] = {0}; |
14 |
| - // Check AVX2 |
15 |
| - __cpuid(cpuInfo, 0); |
16 |
| - int nIds = cpuInfo[0]; |
| 13 | + int cpuInfo[4]; |
17 | 14 | __cpuid(cpuInfo, 1);
|
18 |
| - bool osxsave = (cpuInfo[2] & (1 << 27)) != 0; |
19 |
| - bool avx = (cpuInfo[2] & (1 << 28)) != 0; |
20 |
| - __cpuid(cpuInfo, 7); |
21 |
| - bool avx2 = (cpuInfo[1] & (1 << 5)) != 0; |
22 |
| - bool avx512f = (cpuInfo[1] & (1 << 16)) != 0; |
23 |
| - bool avx512dq = (cpuInfo[1] & (1 << 17)) != 0; |
24 |
| - bool avx512bw = (cpuInfo[1] & (1 << 30)) != 0; |
25 |
| - bool avx512vl = (cpuInfo[1] & (1 << 31)) != 0; |
26 |
| - bool avx512vbmi2 = (cpuInfo[2] & (1 << 6)) != 0; |
27 |
| - bool avx512fp16 = (cpuInfo[3] & (1 << 23)) != 0; |
28 |
| - // Store results |
29 |
| - xss_cpu_features["avx2"] = avx2; |
30 |
| - xss_cpu_features["avx512f"] = avx512f; |
31 |
| - xss_cpu_features["avx512dq"] = avx512dq; |
32 |
| - xss_cpu_features["avx512bw"] = avx512bw; |
33 |
| - xss_cpu_features["avx512vl"] = avx512vl; |
34 |
| - xss_cpu_features["avx512vbmi2"] = avx512vbmi2; |
35 |
| - xss_cpu_features["avx512fp16"] = avx512fp16; |
| 15 | + |
| 16 | + bool osxsaveSupported = (cpuInfo[2] & (1 << 27)) != 0; // OSXSAVE bit |
| 17 | + bool avxSupported = (cpuInfo[2] & (1 << 28)) != 0; // AVX bit |
| 18 | + if (!(avxSupported && osxsaveSupported)) |
| 19 | + return false; |
| 20 | + |
| 21 | + // Check XCR0[2:1] (XMM and YMM state) |
| 22 | + unsigned long long xcr0 = _xgetbv(0); |
| 23 | + return (xcr0 & 0x6) == 0x6; |
| 24 | +} |
| 25 | + |
| 26 | +static bool os_supports_avx512() |
| 27 | +{ |
| 28 | + if (!os_supports_avx()) |
| 29 | + return false; |
| 30 | + |
| 31 | + // Need XCR0[7:5] = opmask/ZMM/YMM state enabled |
| 32 | + unsigned long long xcr0 = _xgetbv(0); |
| 33 | + return (xcr0 & 0xE0) == 0xE0; |
| 34 | +} |
| 35 | + |
| 36 | +void xss_cpu_init() |
| 37 | +{ |
| 38 | + int cpuInfo[4]; |
| 39 | + __cpuid(cpuInfo, 0); |
| 40 | + int maxLeaf = cpuInfo[0]; |
| 41 | + |
| 42 | + bool hasAVX2 = false; |
| 43 | + bool hasAVX512F = false, hasAVX512DQ = false, hasAVX512BW = false, hasAVX512VL = false; |
| 44 | + bool hasAVX512VBMI2 = false, hasAVX512FP16 = false; |
| 45 | + |
| 46 | + if (maxLeaf >= 7) |
| 47 | + { |
| 48 | + __cpuidex(cpuInfo, 7, 0); |
| 49 | + |
| 50 | + // EBX bits |
| 51 | + hasAVX2 = os_supports_avx() && (cpuInfo[1] & (1 << 5)); |
| 52 | + hasAVX512F = os_supports_avx512() && (cpuInfo[1] & (1 << 16)); |
| 53 | + hasAVX512DQ = os_supports_avx512() && (cpuInfo[1] & (1 << 17)); |
| 54 | + hasAVX512BW = os_supports_avx512() && (cpuInfo[1] & (1 << 30)); |
| 55 | + hasAVX512VL = os_supports_avx512() && (cpuInfo[1] & (1 << 31)); |
| 56 | + |
| 57 | + // ECX bits |
| 58 | + hasAVX512VBMI2 = os_supports_avx512() && (cpuInfo[2] & (1 << 6)); |
| 59 | + |
| 60 | + // EDX bits |
| 61 | + hasAVX512FP16 = os_supports_avx512() && (cpuInfo[3] & (1 << 23)); |
| 62 | + } |
| 63 | + |
| 64 | + xss_cpu_features["avx2"] = hasAVX2; |
| 65 | + xss_cpu_features["avx512f"] = hasAVX512F; |
| 66 | + xss_cpu_features["avx512dq"] = hasAVX512DQ; |
| 67 | + xss_cpu_features["avx512bw"] = hasAVX512BW; |
| 68 | + xss_cpu_features["avx512vl"] = hasAVX512VL; |
| 69 | + xss_cpu_features["avx512vbmi2"] = hasAVX512VBMI2; |
| 70 | + xss_cpu_features["avx512fp16"] = hasAVX512FP16; |
36 | 71 | }
|
37 | 72 |
|
38 | 73 | inline bool xss_cpu_supports(const char *feature)
|
|
0 commit comments