@@ -46,6 +46,23 @@ static inline void cpuid(int output[4], int functionNumber) {
4646#endif
4747}
4848
49+ // Multi-platform XGETBV "intrinsic"
50+ static inline int64_t xgetbv (int ctr ) {
51+ #if defined(__GNUC__ ) || defined(__clang__ )
52+ // use inline assembly, Gnu/AT&T syntax
53+ uint32_t a , d ;
54+ __asm("xgetbv" : "=a" (a ),"=d" (d ) : "c" (ctr ) : );
55+ return a | (((uint64_t ) d ) << 32 );
56+
57+ #elif (defined (_MSC_FULL_VER ) && _MSC_FULL_VER >= 160040000 ) || (defined (__INTEL_COMPILER ) && __INTEL_COMPILER >= 1200 )
58+ // Microsoft or Intel compiler supporting _xgetbv intrinsic
59+ return _xgetbv (ctr );
60+
61+ #else
62+ #error Unsupported compiler
63+ #endif
64+ }
65+
4966// Utility function to horizontally add 8 32-bit integers
5067static inline int hsum_i32_8 (const __m256i a ) {
5168 const __m128i sum128 = _mm_add_epi32 (_mm256_castsi256_si128 (a ), _mm256_extractf128_si256 (a , 1 ));
@@ -57,11 +74,20 @@ static inline int hsum_i32_8(const __m256i a) {
5774
5875EXPORT int vec_caps () {
5976 int cpuInfo [4 ] = {-1 };
60- // Calling __cpuid with 0x0 as the function_id argument
77+ // Calling CPUID function 0x0 as the function_id argument
6178 // gets the number of the highest valid function ID.
6279 cpuid (cpuInfo , 0 );
6380 int functionIds = cpuInfo [0 ];
81+ if (functionIds == 0 ) {
82+ // No CPUID functions
83+ return 0 ;
84+ }
85+ // call CPUID function 0x1 for feature flags
86+ cpuid (cpuInfo , 1 );
87+ int hasOsXsave = (cpuInfo [2 ] & (1 << 27 )) != 0 ;
88+ int avxEnabledInOS = hasOsXsave && ((xgetbv (0 ) & 6 ) == 6 );
6489 if (functionIds >= 7 ) {
90+ // call CPUID function 0x7 for AVX2/512 flags
6591 cpuid (cpuInfo , 7 );
6692 int ebx = cpuInfo [1 ];
6793 int ecx = cpuInfo [2 ];
@@ -72,10 +98,18 @@ EXPORT int vec_caps() {
7298 // int avx512_vnni = (ecx & 0x00000800) != 0;
7399 // if (avx512 && avx512_vnni) {
74100 if (avx512 ) {
75- return 2 ;
101+ if (avxEnabledInOS ) {
102+ return 2 ;
103+ } else {
104+ return -2 ;
105+ }
76106 }
77107 if (avx2 ) {
78- return 1 ;
108+ if (avxEnabledInOS ) {
109+ return 1 ;
110+ } else {
111+ return -1 ;
112+ }
79113 }
80114 }
81115 return 0 ;
0 commit comments