Skip to content

Commit 4cb589e

Browse files
committed
Add checks for xsave and _xgetbv in cpuid checks
1 parent 3c007d7 commit 4cb589e

File tree

1 file changed

+58
-23
lines changed

1 file changed

+58
-23
lines changed

lib/x86simdsortcpuid.h

Lines changed: 58 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -8,31 +8,66 @@
88

99
static std::unordered_map<std::string, bool> xss_cpu_features;
1010

11-
inline void xss_cpu_init()
11+
static bool os_supports_avx()
1212
{
13-
int cpuInfo[4] = {0};
14-
// Check AVX2
15-
__cpuid(cpuInfo, 0);
16-
int nIds = cpuInfo[0];
13+
int cpuInfo[4];
1714
__cpuid(cpuInfo, 1);
18-
bool osxsave = (cpuInfo[2] & (1 << 27)) != 0;
19-
bool avx = (cpuInfo[2] & (1 << 28)) != 0;
20-
__cpuid(cpuInfo, 7);
21-
bool avx2 = (cpuInfo[1] & (1 << 5)) != 0;
22-
bool avx512f = (cpuInfo[1] & (1 << 16)) != 0;
23-
bool avx512dq = (cpuInfo[1] & (1 << 17)) != 0;
24-
bool avx512bw = (cpuInfo[1] & (1 << 30)) != 0;
25-
bool avx512vl = (cpuInfo[1] & (1 << 31)) != 0;
26-
bool avx512vbmi2 = (cpuInfo[2] & (1 << 6)) != 0;
27-
bool avx512fp16 = (cpuInfo[3] & (1 << 23)) != 0;
28-
// Store results
29-
xss_cpu_features["avx2"] = avx2;
30-
xss_cpu_features["avx512f"] = avx512f;
31-
xss_cpu_features["avx512dq"] = avx512dq;
32-
xss_cpu_features["avx512bw"] = avx512bw;
33-
xss_cpu_features["avx512vl"] = avx512vl;
34-
xss_cpu_features["avx512vbmi2"] = avx512vbmi2;
35-
xss_cpu_features["avx512fp16"] = avx512fp16;
15+
16+
bool osxsaveSupported = (cpuInfo[2] & (1 << 27)) != 0; // OSXSAVE bit
17+
bool avxSupported = (cpuInfo[2] & (1 << 28)) != 0; // AVX bit
18+
if (!(avxSupported && osxsaveSupported))
19+
return false;
20+
21+
// Check XCR0[2:1] (XMM and YMM state)
22+
unsigned long long xcr0 = _xgetbv(0);
23+
return (xcr0 & 0x6) == 0x6;
24+
}
25+
26+
static bool os_supports_avx512()
27+
{
28+
if (!os_supports_avx())
29+
return false;
30+
31+
// Need XCR0[7:5] = opmask/ZMM/YMM state enabled
32+
unsigned long long xcr0 = _xgetbv(0);
33+
return (xcr0 & 0xE0) == 0xE0;
34+
}
35+
36+
void xss_cpu_init()
37+
{
38+
int cpuInfo[4];
39+
__cpuid(cpuInfo, 0);
40+
int maxLeaf = cpuInfo[0];
41+
42+
bool hasAVX2 = false;
43+
bool hasAVX512F = false, hasAVX512DQ = false, hasAVX512BW = false, hasAVX512VL = false;
44+
bool hasAVX512VBMI2 = false, hasAVX512FP16 = false;
45+
46+
if (maxLeaf >= 7)
47+
{
48+
__cpuidex(cpuInfo, 7, 0);
49+
50+
// EBX bits
51+
hasAVX2 = os_supports_avx() && (cpuInfo[1] & (1 << 5));
52+
hasAVX512F = os_supports_avx512() && (cpuInfo[1] & (1 << 16));
53+
hasAVX512DQ = os_supports_avx512() && (cpuInfo[1] & (1 << 17));
54+
hasAVX512BW = os_supports_avx512() && (cpuInfo[1] & (1 << 30));
55+
hasAVX512VL = os_supports_avx512() && (cpuInfo[1] & (1 << 31));
56+
57+
// ECX bits
58+
hasAVX512VBMI2 = os_supports_avx512() && (cpuInfo[2] & (1 << 6));
59+
60+
// EDX bits
61+
hasAVX512FP16 = os_supports_avx512() && (cpuInfo[3] & (1 << 23));
62+
}
63+
64+
xss_cpu_features["avx2"] = hasAVX2;
65+
xss_cpu_features["avx512f"] = hasAVX512F;
66+
xss_cpu_features["avx512dq"] = hasAVX512DQ;
67+
xss_cpu_features["avx512bw"] = hasAVX512BW;
68+
xss_cpu_features["avx512vl"] = hasAVX512VL;
69+
xss_cpu_features["avx512vbmi2"] = hasAVX512VBMI2;
70+
xss_cpu_features["avx512fp16"] = hasAVX512FP16;
3671
}
3772

3873
inline bool xss_cpu_supports(const char *feature)

0 commit comments

Comments
 (0)