Skip to content

Commit 637e5ed

Browse files
committed
update vec_caps to keep into consideration OS-level disable of AVX
1 parent 17c6e10 commit 637e5ed

File tree

3 files changed

+39
-5
lines changed

3 files changed

+39
-5
lines changed

libs/native/libraries/build.gradle

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ configurations {
1919
}
2020

2121
var zstdVersion = "1.5.5"
22-
var vecVersion = "1.0.10"
22+
var vecVersion = "1.0.11"
2323

2424
repositories {
2525
exclusiveContent {

libs/simdvec/native/publish_vec_binaries.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ if [ -z "$ARTIFACTORY_API_KEY" ]; then
2020
exit 1;
2121
fi
2222

23-
VERSION="1.0.10"
23+
VERSION="1.0.11"
2424
ARTIFACTORY_REPOSITORY="${ARTIFACTORY_REPOSITORY:-https://artifactory.elastic.dev/artifactory/elasticsearch-native/}"
2525
TEMP=$(mktemp -d)
2626

libs/simdvec/native/src/vec/c/amd64/vec.c

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,23 @@ static inline void cpuid(int output[4], int functionNumber) {
4646
#endif
4747
}
4848

49+
// Multi-platform XGETBV "intrinsic"
50+
static inline int64_t xgetbv(int ctr) {
51+
#if defined(__GNUC__) || defined(__clang__)
52+
// use inline assembly, Gnu/AT&T syntax
53+
uint32_t a, d;
54+
__asm("xgetbv" : "=a"(a),"=d"(d) : "c"(ctr) : );
55+
return a | (uint64_t(d) << 32);
56+
57+
#elif (defined (_MSC_FULL_VER) && _MSC_FULL_VER >= 160040000) || (defined (__INTEL_COMPILER) && __INTEL_COMPILER >= 1200) // Microsoft or Intel compiler supporting _xgetbv intrinsic
58+
// intrinsic function for XGETBV
59+
return _xgetbv(ctr);
60+
61+
#else
62+
#error Unsupported compiler
63+
#endif
64+
}
65+
4966
// Utility function to horizontally add 8 32-bit integers
5067
static inline int hsum_i32_8(const __m256i a) {
5168
const __m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(a), _mm256_extractf128_si256(a, 1));
@@ -57,11 +74,20 @@ static inline int hsum_i32_8(const __m256i a) {
5774

5875
EXPORT int vec_caps() {
5976
int cpuInfo[4] = {-1};
60-
// Calling __cpuid with 0x0 as the function_id argument
77+
// Calling CPUID function 0x0 as the function_id argument
6178
// gets the number of the highest valid function ID.
6279
cpuid(cpuInfo, 0);
6380
int functionIds = cpuInfo[0];
81+
if (functionIds == 0) {
82+
// No CPUID functions
83+
return 0;
84+
}
85+
// call CPUID function 0x1 for feature flags
86+
cpuid(cpuInfo, 1);
87+
int hasOsXsave = (cpuInfo[2] & (1 << 27)) != 0;
88+
int avxEnabledInOS = hasOsXsave && ((xgetbv(0) & 6) == 6);
6489
if (functionIds >= 7) {
90+
// call CPUID function 0x7 for AVX2/512 flags
6591
cpuid(cpuInfo, 7);
6692
int ebx = cpuInfo[1];
6793
int ecx = cpuInfo[2];
@@ -72,10 +98,18 @@ EXPORT int vec_caps() {
7298
// int avx512_vnni = (ecx & 0x00000800) != 0;
7399
// if (avx512 && avx512_vnni) {
74100
if (avx512) {
75-
return 2;
101+
if (avxEnabledInOS) {
102+
return 2;
103+
} else {
104+
return -2;
105+
}
76106
}
77107
if (avx2) {
78-
return 1;
108+
if (avxEnabledInOS) {
109+
return 1;
110+
} else {
111+
return -1;
112+
}
79113
}
80114
}
81115
return 0;

0 commit comments

Comments
 (0)