diff --git a/docs/changelog/126911.yaml b/docs/changelog/126911.yaml new file mode 100644 index 0000000000000..040d1dff767bf --- /dev/null +++ b/docs/changelog/126911.yaml @@ -0,0 +1,6 @@ +pr: 126911 +summary: Fix `vec_caps` to test for OS support too (on x64) +area: Vector Search +type: bug +issues: + - 126809 diff --git a/libs/native/libraries/build.gradle b/libs/native/libraries/build.gradle index ed37d4a70931f..58562ddcd6882 100644 --- a/libs/native/libraries/build.gradle +++ b/libs/native/libraries/build.gradle @@ -19,7 +19,7 @@ configurations { } var zstdVersion = "1.5.5" -var vecVersion = "1.0.10" +var vecVersion = "1.0.11" repositories { exclusiveContent { diff --git a/libs/native/src/main/java/org/elasticsearch/nativeaccess/jdk/JdkVectorLibrary.java b/libs/native/src/main/java/org/elasticsearch/nativeaccess/jdk/JdkVectorLibrary.java index 3277cb8f8e6c7..f1734055cda4a 100644 --- a/libs/native/src/main/java/org/elasticsearch/nativeaccess/jdk/JdkVectorLibrary.java +++ b/libs/native/src/main/java/org/elasticsearch/nativeaccess/jdk/JdkVectorLibrary.java @@ -41,7 +41,7 @@ public final class JdkVectorLibrary implements VectorLibrary { try { int caps = (int) vecCaps$mh.invokeExact(); logger.info("vec_caps=" + caps); - if (caps != 0) { + if (caps > 0) { if (caps == 2) { dot7u$mh = downcallHandle( "dot7u_2", @@ -67,6 +67,11 @@ public final class JdkVectorLibrary implements VectorLibrary { } INSTANCE = new JdkVectorSimilarityFunctions(); } else { + if (caps < 0) { + logger.warn(""" + Your CPU supports vector capabilities, but they are disabled at OS level. For optimal performance, \ + enable them in your OS/Hypervisor/VM/container"""); + } dot7u$mh = null; sqr7u$mh = null; INSTANCE = null; diff --git a/libs/simdvec/native/build.gradle b/libs/simdvec/native/build.gradle index 848d3b325a03f..2f9e61c826bd7 100644 --- a/libs/simdvec/native/build.gradle +++ b/libs/simdvec/native/build.gradle @@ -11,7 +11,7 @@ apply plugin: 'cpp' var os = org.gradle.internal.os.OperatingSystem.current() -// To update this library run publish_vec_binaries.sh ( or ./gradlew vecSharedLibrary ) +// To update this library run publish_vec_binaries.sh ( or ./gradlew buildSharedLibrary ) // Or // For local development, build the docker image with: // docker build --platform linux/arm64 --progress=plain --file=Dockerfile.aarch64 . (for aarch64) diff --git a/libs/simdvec/native/publish_vec_binaries.sh b/libs/simdvec/native/publish_vec_binaries.sh index db2e6e877a032..e3d7e4858ecfc 100755 --- a/libs/simdvec/native/publish_vec_binaries.sh +++ b/libs/simdvec/native/publish_vec_binaries.sh @@ -20,7 +20,7 @@ if [ -z "$ARTIFACTORY_API_KEY" ]; then exit 1; fi -VERSION="1.0.10" +VERSION="1.0.11" ARTIFACTORY_REPOSITORY="${ARTIFACTORY_REPOSITORY:-https://artifactory.elastic.dev/artifactory/elasticsearch-native/}" TEMP=$(mktemp -d) diff --git a/libs/simdvec/native/src/vec/c/amd64/vec.c b/libs/simdvec/native/src/vec/c/amd64/vec.c index 50eee12ff69b6..f63a7649b1390 100644 --- a/libs/simdvec/native/src/vec/c/amd64/vec.c +++ b/libs/simdvec/native/src/vec/c/amd64/vec.c @@ -46,6 +46,23 @@ static inline void cpuid(int output[4], int functionNumber) { #endif } +// Multi-platform XGETBV "intrinsic" +static inline int64_t xgetbv(int ctr) { +#if defined(__GNUC__) || defined(__clang__) + // use inline assembly, Gnu/AT&T syntax + uint32_t a, d; + __asm("xgetbv" : "=a"(a),"=d"(d) : "c"(ctr) : ); + return a | (((uint64_t) d) << 32); + +#elif (defined (_MSC_FULL_VER) && _MSC_FULL_VER >= 160040000) || (defined (__INTEL_COMPILER) && __INTEL_COMPILER >= 1200) + // Microsoft or Intel compiler supporting _xgetbv intrinsic + return _xgetbv(ctr); + +#else + #error Unsupported compiler +#endif +} + // Utility function to horizontally add 8 32-bit integers static inline int hsum_i32_8(const __m256i a) { const __m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(a), _mm256_extractf128_si256(a, 1)); @@ -57,11 +74,20 @@ static inline int hsum_i32_8(const __m256i a) { EXPORT int vec_caps() { int cpuInfo[4] = {-1}; - // Calling __cpuid with 0x0 as the function_id argument + // Calling CPUID function 0x0 as the function_id argument // gets the number of the highest valid function ID. cpuid(cpuInfo, 0); int functionIds = cpuInfo[0]; + if (functionIds == 0) { + // No CPUID functions + return 0; + } + // call CPUID function 0x1 for feature flags + cpuid(cpuInfo, 1); + int hasOsXsave = (cpuInfo[2] & (1 << 27)) != 0; + int avxEnabledInOS = hasOsXsave && ((xgetbv(0) & 6) == 6); if (functionIds >= 7) { + // call CPUID function 0x7 for AVX2/512 flags cpuid(cpuInfo, 7); int ebx = cpuInfo[1]; int ecx = cpuInfo[2]; @@ -72,10 +98,18 @@ EXPORT int vec_caps() { // int avx512_vnni = (ecx & 0x00000800) != 0; // if (avx512 && avx512_vnni) { if (avx512) { - return 2; + if (avxEnabledInOS) { + return 2; + } else { + return -2; + } } if (avx2) { - return 1; + if (avxEnabledInOS) { + return 1; + } else { + return -1; + } } } return 0;