From c929d57306b2e0967147122ba2d8e0c0bcc4d94d Mon Sep 17 00:00:00 2001 From: Brian Seeders Date: Wed, 5 Nov 2025 16:17:15 -0500 Subject: [PATCH 01/14] [gpu] wip - updates for configuring/updating cuvs-java and libcuvs --- .buildkite/scripts/cuvs-snapshot/configure.sh | 35 +++++++---------- .../cuvs-snapshot/current-snapshot-version | 1 - .../remove-verification-metadata.py | 13 +++++++ .../update-current-snapshot-version.sh | 38 +++++++------------ build-tools-internal/version.properties | 1 + x-pack/plugin/gpu/build.gradle | 2 +- 6 files changed, 42 insertions(+), 48 deletions(-) delete mode 100644 .buildkite/scripts/cuvs-snapshot/current-snapshot-version create mode 100644 .buildkite/scripts/cuvs-snapshot/remove-verification-metadata.py diff --git a/.buildkite/scripts/cuvs-snapshot/configure.sh b/.buildkite/scripts/cuvs-snapshot/configure.sh index 241d5f78900e0..ca06c4da04334 100755 --- a/.buildkite/scripts/cuvs-snapshot/configure.sh +++ b/.buildkite/scripts/cuvs-snapshot/configure.sh @@ -1,7 +1,5 @@ #!/bin/bash -set -euo pipefail - if [[ -f /etc/profile.d/elastic-nvidia.sh ]]; then export JAVA_HOME="$HOME/.java/openjdk24" export PATH="$JAVA_HOME/bin:$PATH" @@ -16,28 +14,23 @@ fi # No idea why... nvidia-smi -CURRENT_SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -ELASTICSEARCH_REPO_DIR="$(cd "$CURRENT_SCRIPT_DIR/../../.." && pwd)" - -CUVS_SNAPSHOT_VERSION="${CUVS_SNAPSHOT_VERSION:-$(cat "$CURRENT_SCRIPT_DIR"/current-snapshot-version)}" -CUVS_ARCHIVE="cuvs-$CUVS_SNAPSHOT_VERSION.tar.gz" -CUVS_URL="https://storage.googleapis.com/elasticsearch-cuvs-snapshots/$CUVS_ARCHIVE" +LIBCUVS_GCS_BUCKET="elasticsearch-cuvs-snapshots" -CUVS_WORKSPACE=${CUVS_WORKSPACE:-$(cd "$(mktemp -d)")} -CUVS_DIR="$(pwd)/cuvs-$CUVS_SNAPSHOT_VERSION" +LIBCUVS_DIR="$HOME/libcuvs" +mkdir -p "$LIBCUVS_DIR" -curl -O "$CUVS_URL" -tar -xzf "$CUVS_ARCHIVE" +CUVS_VERSION=$(grep 'cuvs_java' build-tools-internal/version.properties | awk '{print $3}') -CUVS_VERSION=$(cd "$CUVS_DIR/cuvs-java/target" && mvn help:evaluate -Dexpression=project.version -q -DforceStdout) +LIBCUVS_VERSION_DIR="$LIBCUVS_DIR/$CUVS_VERSION" -LD_LIBRARY_PATH=$(echo "$LD_LIBRARY_PATH" | tr ':' '\n' | grep -v "libcuvs/linux-x64" | tr '\n' ':' | sed 's/:$//') -LD_LIBRARY_PATH="$CUVS_DIR/libcuvs/linux-x64:$LD_LIBRARY_PATH" -export LD_LIBRARY_PATH +if [[ ! -d "$LIBCUVS_VERSION_DIR" ]]; then + cd "$LIBCUVS_DIR" + CUVS_ARCHIVE="libcuvs-$CUVS_VERSION.tar.gz" + curl -O "https://storage.googleapis.com/$LIBCUVS_GCS_BUCKET/$CUVS_ARCHIVE" + tar -xzf "$CUVS_ARCHIVE" + rm -f "$CUVS_ARCHIVE" -cd "$CUVS_DIR/cuvs-java/target" -mvn install:install-file -Dfile="cuvs-java-$CUVS_VERSION.jar" -DartifactId=elastic-cuvs-java -DgeneratePom=true + cd - +fi -cd "$ELASTICSEARCH_REPO_DIR" -PLUGIN_GRADLE_FILE=x-pack/plugin/gpu/build.gradle -sed -i "s|implementation 'com.nvidia.cuvs:elastic-cuvs-java:.*'|implementation 'com.nvidia.cuvs:elastic-cuvs-java:$CUVS_VERSION'|" "$PLUGIN_GRADLE_FILE" +export LD_LIBRARY_PATH="$LIBCUVS_VERSION_DIR:$LD_LIBRARY_PATH" diff --git a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version deleted file mode 100644 index 3bb6b7db4687c..0000000000000 --- a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version +++ /dev/null @@ -1 +0,0 @@ -fdb8bfb8 diff --git a/.buildkite/scripts/cuvs-snapshot/remove-verification-metadata.py b/.buildkite/scripts/cuvs-snapshot/remove-verification-metadata.py new file mode 100644 index 0000000000000..01d9fd4e3d867 --- /dev/null +++ b/.buildkite/scripts/cuvs-snapshot/remove-verification-metadata.py @@ -0,0 +1,13 @@ +import re + +regex = re.compile( + r'` string in version.properties and maintain the same indentation +sed -E "s/^(cuvs_java *= *[^ ]* *).*\$/\1$CUVS_JAVA_VERSION/" build-tools-internal/version.properties > new-version.properties +mv new-version.properties build-tools-internal/version.properties -if [[ -z "${CUVS_SNAPSHOT_VERSION:-}" ]]; then - echo "CUVS_SNAPSHOT_VERSION not set. Set this to update the current snapshot version." - exit 1 -fi +python3 .buildkite/scripts/lucene-snapshot/remove-verification-metadata.py +./gradlew --write-verification-metadata sha256 -if [[ "$CUVS_SNAPSHOT_VERSION" == "$(cat $SNAPSHOT_VERSION_FILE)" ]]; then - echo "Current snapshot version already set to '$CUVS_SNAPSHOT_VERSION'. No need to update." +if git diff-index --quiet HEAD --; then + echo 'No changes to commit.' exit 0 fi -echo "--- Configuring libcuvs/cuvs-java" -source .buildkite/scripts/cuvs-snapshot/configure.sh - -if [[ "${SKIP_TESTING:-}" != "true" ]]; then - echo "--- Testing snapshot before updating" - ./gradlew -Druntime.java=24 :x-pack:plugin:gpu:yamlRestTest -S -fi - -echo "--- Updating snapshot" - -echo "$CUVS_SNAPSHOT_VERSION" > "$SNAPSHOT_VERSION_FILE" +git config --global user.name elasticsearchmachine +git config --global user.email 'infra-root+elasticsearchmachine@elastic.co' -CURRENT_SHA="$(gh api "/repos/elastic/elasticsearch/contents/$SNAPSHOT_VERSION_FILE?ref=$BRANCH_TO_UPDATE" | jq -r .sha)" || true +git add build-tools-internal/version.properties +git add gradle/verification-metadata.xml -gh api -X PUT "/repos/elastic/elasticsearch/contents/$SNAPSHOT_VERSION_FILE" \ - -f branch="$BRANCH_TO_UPDATE" \ - -f message="Update cuvs snapshot version to $CUVS_VERSION" \ - -f content="$(base64 -w 0 "$WORKSPACE/$SNAPSHOT_VERSION_FILE")" \ - -f sha="$CURRENT_SHA" +git commit -m "[Automated] Update cuvs-java to $CUVS_JAVA_VERSION" +git push origin "$BUILDKITE_BRANCH" diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 24f0cca082b2d..77dabc6803302 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -19,6 +19,7 @@ commons_lang3 = 3.9 google_oauth_client = 1.34.1 awsv2sdk = 2.31.78 reactive_streams = 1.0.4 +cuvs_java = 25.10.0 antlr4 = 4.13.1 # bouncy castle version for non-fips. fips jars use a different version diff --git a/x-pack/plugin/gpu/build.gradle b/x-pack/plugin/gpu/build.gradle index 2f5034ee11b9c..8ed4123b6349b 100644 --- a/x-pack/plugin/gpu/build.gradle +++ b/x-pack/plugin/gpu/build.gradle @@ -16,7 +16,7 @@ base { dependencies { compileOnly project(path: xpackModule('core')) compileOnly project(':server') - implementation('com.nvidia.cuvs:cuvs-java:25.10.0') + implementation("com.nvidia.cuvs:cuvs-java:${versions.cuvs_java}") testImplementation(testArtifact(project(xpackModule('core')))) testImplementation(testArtifact(project(':server'))) yamlRestTestImplementation(project(xpackModule('gpu'))) From 5510ba3eb88cae499e8f7fc40106ee3d6f431da3 Mon Sep 17 00:00:00 2001 From: Brian Seeders Date: Wed, 5 Nov 2025 16:35:10 -0500 Subject: [PATCH 02/14] Try a different zone --- .buildkite/pipelines/cuvs-snapshot/update-snapshot.yml | 2 +- .buildkite/scripts/cuvs-snapshot/configure.sh | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.buildkite/pipelines/cuvs-snapshot/update-snapshot.yml b/.buildkite/pipelines/cuvs-snapshot/update-snapshot.yml index 6011926d782bb..b35602104507a 100644 --- a/.buildkite/pipelines/cuvs-snapshot/update-snapshot.yml +++ b/.buildkite/pipelines/cuvs-snapshot/update-snapshot.yml @@ -5,7 +5,7 @@ steps: provider: gcp image: family/elasticsearch-ubuntu-2404-nvidia machineType: g2-standard-16 - zones: us-central1-b,us-central1-c + zones: northamerica-northeast2-a diskSizeGb: 150 - wait: ~ - trigger: "elasticsearch-cuvs-run-tests" diff --git a/.buildkite/scripts/cuvs-snapshot/configure.sh b/.buildkite/scripts/cuvs-snapshot/configure.sh index ca06c4da04334..95b091474184e 100755 --- a/.buildkite/scripts/cuvs-snapshot/configure.sh +++ b/.buildkite/scripts/cuvs-snapshot/configure.sh @@ -8,11 +8,11 @@ if [[ -f /etc/profile.d/elastic-nvidia.sh ]]; then export LD_LIBRARY_PATH="${LD_LIBRARY_PATH:-}" source /etc/profile.d/elastic-nvidia.sh -fi -# Not running this before the tests results in an error when running the tests -# No idea why... -nvidia-smi + # Not running this before the tests results in an error when running the tests + # No idea why... + nvidia-smi +fi LIBCUVS_GCS_BUCKET="elasticsearch-cuvs-snapshots" @@ -33,4 +33,4 @@ if [[ ! -d "$LIBCUVS_VERSION_DIR" ]]; then cd - fi -export LD_LIBRARY_PATH="$LIBCUVS_VERSION_DIR:$LD_LIBRARY_PATH" +export LD_LIBRARY_PATH="$LIBCUVS_VERSION_DIR:${LD_LIBRARY_PATH:-}" From a7747b5e16297dddfb4272f805312a65a3919b70 Mon Sep 17 00:00:00 2001 From: Brian Seeders Date: Wed, 5 Nov 2025 16:47:02 -0500 Subject: [PATCH 03/14] checkout branch first --- .../cuvs-snapshot/update-current-snapshot-version.sh | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.buildkite/scripts/cuvs-snapshot/update-current-snapshot-version.sh b/.buildkite/scripts/cuvs-snapshot/update-current-snapshot-version.sh index 29fecf091d7c9..a51340b933bc4 100755 --- a/.buildkite/scripts/cuvs-snapshot/update-current-snapshot-version.sh +++ b/.buildkite/scripts/cuvs-snapshot/update-current-snapshot-version.sh @@ -2,6 +2,11 @@ set -euo pipefail +echo "--- Updating cuvs-java version" + +git checkout "$BUILDKITE_BRANCH" +git pull --ff-only origin "$BUILDKITE_BRANCH" + # Replace `cuvs_java = ` string in version.properties and maintain the same indentation sed -E "s/^(cuvs_java *= *[^ ]* *).*\$/\1$CUVS_JAVA_VERSION/" build-tools-internal/version.properties > new-version.properties mv new-version.properties build-tools-internal/version.properties @@ -9,11 +14,18 @@ mv new-version.properties build-tools-internal/version.properties python3 .buildkite/scripts/lucene-snapshot/remove-verification-metadata.py ./gradlew --write-verification-metadata sha256 +if [[ "${SKIP_TESTING:-}" != "true" ]]; then + echo "--- Testing cuvs-java before committing" + ./gradlew -Druntime.java=24 :x-pack:plugin:gpu:yamlRestTest -S +fi + if git diff-index --quiet HEAD --; then echo 'No changes to commit.' exit 0 fi +echo "--- Committing changes" + git config --global user.name elasticsearchmachine git config --global user.email 'infra-root+elasticsearchmachine@elastic.co' From a1302b69fc16f83739efd105c939809956758509 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 5 Nov 2025 21:55:08 +0000 Subject: [PATCH 04/14] [Automated] Update cuvs-java to 25.12.0-57ad98bd-SNAPSHOT --- build-tools-internal/version.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 77dabc6803302..e7679d8268ea6 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -19,7 +19,7 @@ commons_lang3 = 3.9 google_oauth_client = 1.34.1 awsv2sdk = 2.31.78 reactive_streams = 1.0.4 -cuvs_java = 25.10.0 +cuvs_java = 25.12.0-57ad98bd-SNAPSHOT antlr4 = 4.13.1 # bouncy castle version for non-fips. fips jars use a different version From 2cb7a1a9643befe9a5cf2aae994dc8ec98a6f063 Mon Sep 17 00:00:00 2001 From: Brian Seeders Date: Thu, 6 Nov 2025 10:53:57 -0500 Subject: [PATCH 05/14] Fix path, change zone --- .buildkite/pipelines/cuvs-snapshot/run-tests.yml | 2 +- .buildkite/scripts/cuvs-snapshot/configure.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.buildkite/pipelines/cuvs-snapshot/run-tests.yml b/.buildkite/pipelines/cuvs-snapshot/run-tests.yml index fe4e2f8cefd70..08ca11d0f6294 100644 --- a/.buildkite/pipelines/cuvs-snapshot/run-tests.yml +++ b/.buildkite/pipelines/cuvs-snapshot/run-tests.yml @@ -7,7 +7,7 @@ steps: image: family/elasticsearch-ubuntu-2404-nvidia machineType: g2-standard-32 buildDirectory: /dev/shm/bk - zones: us-central1-b,us-central1-c + zones: northamerica-northeast2-a env: GRADLE_TASK: "{{matrix.GRADLE_TASK}}" matrix: diff --git a/.buildkite/scripts/cuvs-snapshot/configure.sh b/.buildkite/scripts/cuvs-snapshot/configure.sh index 95b091474184e..51137c3d400c4 100755 --- a/.buildkite/scripts/cuvs-snapshot/configure.sh +++ b/.buildkite/scripts/cuvs-snapshot/configure.sh @@ -26,7 +26,7 @@ LIBCUVS_VERSION_DIR="$LIBCUVS_DIR/$CUVS_VERSION" if [[ ! -d "$LIBCUVS_VERSION_DIR" ]]; then cd "$LIBCUVS_DIR" CUVS_ARCHIVE="libcuvs-$CUVS_VERSION.tar.gz" - curl -O "https://storage.googleapis.com/$LIBCUVS_GCS_BUCKET/$CUVS_ARCHIVE" + curl -fO "https://storage.googleapis.com/$LIBCUVS_GCS_BUCKET/libcuvs/$CUVS_ARCHIVE" tar -xzf "$CUVS_ARCHIVE" rm -f "$CUVS_ARCHIVE" From 0cfbcc53b9bc6f3ecb429dd3c430d98579d61f2e Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 6 Nov 2025 19:13:52 +0000 Subject: [PATCH 06/14] [Automated] Update cuvs-java to 25.12.0-eb4647ee-SNAPSHOT --- build-tools-internal/version.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index e7679d8268ea6..12d786df06906 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -19,7 +19,7 @@ commons_lang3 = 3.9 google_oauth_client = 1.34.1 awsv2sdk = 2.31.78 reactive_streams = 1.0.4 -cuvs_java = 25.12.0-57ad98bd-SNAPSHOT +cuvs_java = 25.12.0-eb4647ee-SNAPSHOT antlr4 = 4.13.1 # bouncy castle version for non-fips. fips jars use a different version From 35cc4107b89f61da283040220f02df1aee74dd52 Mon Sep 17 00:00:00 2001 From: Brian Seeders Date: Thu, 6 Nov 2025 14:23:23 -0500 Subject: [PATCH 07/14] Support for cuvs-snapshot repo --- .../gradle/internal/RepositoriesSetupPlugin.java | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/RepositoriesSetupPlugin.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/RepositoriesSetupPlugin.java index d559d955e63db..c71aa1fed3163 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/RepositoriesSetupPlugin.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/RepositoriesSetupPlugin.java @@ -22,6 +22,7 @@ public class RepositoriesSetupPlugin implements Plugin { private static final Pattern LUCENE_SNAPSHOT_REGEX = Pattern.compile("\\w+-snapshot-([a-z0-9]+)"); + private static final Pattern CUVS_SNAPSHOT_REGEX = Pattern.compile("([0-9\\.]+-[a-z0-9]+)-SNAPSHOT"); @Override public void apply(Project project) { @@ -60,5 +61,19 @@ public static void configureRepositories(Project project) { exclusiveRepo.forRepositories(luceneRepo); }); } + + String cuvsVersion = VersionProperties.getVersions().get("cuvs_java"); + if (cuvsVersion.contains("-SNAPSHOT")) { + MavenArtifactRepository cuvsRepo = repos.maven(repo -> { + repo.setName("cuvs-snapshots"); + repo.setUrl("https://storage.googleapis.com/elasticsearch-cuvs-snapshots"); + }); + repos.exclusiveContent(exclusiveRepo -> { + exclusiveRepo.filter( + descriptor -> descriptor.includeVersionByRegex("com\\.nvidia\\.cuvs", ".*", ".*-SNAPSHOT") + ); + exclusiveRepo.forRepositories(cuvsRepo); + }); + } } } From 13fd95576c17949c0923c7f946f43321404b2aea Mon Sep 17 00:00:00 2001 From: Brian Seeders Date: Thu, 6 Nov 2025 15:04:18 -0500 Subject: [PATCH 08/14] Use 25.10.0 before merging into main for now --- build-tools-internal/version.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 12d786df06906..77dabc6803302 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -19,7 +19,7 @@ commons_lang3 = 3.9 google_oauth_client = 1.34.1 awsv2sdk = 2.31.78 reactive_streams = 1.0.4 -cuvs_java = 25.12.0-eb4647ee-SNAPSHOT +cuvs_java = 25.10.0 antlr4 = 4.13.1 # bouncy castle version for non-fips. fips jars use a different version From 9d065aeb362b71722f9b17defbc399282a2f3669 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 6 Nov 2025 20:22:29 +0000 Subject: [PATCH 09/14] [CI] Auto commit changes from spotless --- .../gradle/internal/RepositoriesSetupPlugin.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/RepositoriesSetupPlugin.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/RepositoriesSetupPlugin.java index c71aa1fed3163..21e4dfd3a7799 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/RepositoriesSetupPlugin.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/RepositoriesSetupPlugin.java @@ -69,9 +69,7 @@ public static void configureRepositories(Project project) { repo.setUrl("https://storage.googleapis.com/elasticsearch-cuvs-snapshots"); }); repos.exclusiveContent(exclusiveRepo -> { - exclusiveRepo.filter( - descriptor -> descriptor.includeVersionByRegex("com\\.nvidia\\.cuvs", ".*", ".*-SNAPSHOT") - ); + exclusiveRepo.filter(descriptor -> descriptor.includeVersionByRegex("com\\.nvidia\\.cuvs", ".*", ".*-SNAPSHOT")); exclusiveRepo.forRepositories(cuvsRepo); }); } From f74e1f237e487c0a080971a1bd0ddfb8026b47cc Mon Sep 17 00:00:00 2001 From: Brian Seeders Date: Wed, 12 Nov 2025 12:20:39 -0500 Subject: [PATCH 10/14] Try different zone for PR steps --- .buildkite/pipelines/pull-request/gpu.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/pipelines/pull-request/gpu.yml b/.buildkite/pipelines/pull-request/gpu.yml index 40afa272ead7c..e1936c466fbfd 100644 --- a/.buildkite/pipelines/pull-request/gpu.yml +++ b/.buildkite/pipelines/pull-request/gpu.yml @@ -14,7 +14,7 @@ steps: image: family/elasticsearch-ubuntu-2404-nvidia machineType: g2-standard-32 buildDirectory: /dev/shm/bk - zones: us-central1-b,us-central1-c + zones: northamerica-northeast2-a env: GRADLE_TASK: "{{matrix.GRADLE_TASK}}" matrix: From c832187b3ae51f2c7a51a8fe7aa0eb307bc30714 Mon Sep 17 00:00:00 2001 From: Brian Seeders Date: Fri, 21 Nov 2025 12:57:45 -0500 Subject: [PATCH 11/14] Some directory updates to help with esbench image --- .buildkite/scripts/cuvs-snapshot/configure.sh | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/.buildkite/scripts/cuvs-snapshot/configure.sh b/.buildkite/scripts/cuvs-snapshot/configure.sh index 51137c3d400c4..7a6bc0dc913e7 100755 --- a/.buildkite/scripts/cuvs-snapshot/configure.sh +++ b/.buildkite/scripts/cuvs-snapshot/configure.sh @@ -16,20 +16,24 @@ fi LIBCUVS_GCS_BUCKET="elasticsearch-cuvs-snapshots" -LIBCUVS_DIR="$HOME/libcuvs" +LIBCUVS_DIR="/opt/libcuvs" mkdir -p "$LIBCUVS_DIR" +chmod 777 "$LIBCUVS_DIR" CUVS_VERSION=$(grep 'cuvs_java' build-tools-internal/version.properties | awk '{print $3}') LIBCUVS_VERSION_DIR="$LIBCUVS_DIR/$CUVS_VERSION" if [[ ! -d "$LIBCUVS_VERSION_DIR" ]]; then - cd "$LIBCUVS_DIR" + mkdir -p $LIBCUVS_VERSION_DIR + cd "$LIBCUVS_VERSION_DIR" CUVS_ARCHIVE="libcuvs-$CUVS_VERSION.tar.gz" curl -fO "https://storage.googleapis.com/$LIBCUVS_GCS_BUCKET/libcuvs/$CUVS_ARCHIVE" tar -xzf "$CUVS_ARCHIVE" rm -f "$CUVS_ARCHIVE" - + if [[ -d "$CUVS_VERSION" ]]; then + mv "$CUVS_VERSION/*" ./ + fi cd - fi From ba5c9ca47071cf2a9f17531f171743b50f735a6e Mon Sep 17 00:00:00 2001 From: Brian Seeders Date: Fri, 21 Nov 2025 14:22:43 -0500 Subject: [PATCH 12/14] Fix version problem after merge --- libs/gpu-codec/build.gradle | 2 +- x-pack/plugin/gpu/build.gradle | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/libs/gpu-codec/build.gradle b/libs/gpu-codec/build.gradle index 66a6c373c8b6c..9ecef5311046e 100644 --- a/libs/gpu-codec/build.gradle +++ b/libs/gpu-codec/build.gradle @@ -16,7 +16,7 @@ dependencies { api project(':libs:logging') compileOnly "org.apache.lucene:lucene-core:${versions.lucene}" compileOnly project(':server') - implementation('com.nvidia.cuvs:cuvs-java:25.10.0') + implementation("com.nvidia.cuvs:cuvs-java:${versions.cuvs_java}") testImplementation(project(":test:framework")) { exclude group: 'org.elasticsearch', module: 'gpu-codec' diff --git a/x-pack/plugin/gpu/build.gradle b/x-pack/plugin/gpu/build.gradle index 8a0cd619fc59e..af3d266b9f17e 100644 --- a/x-pack/plugin/gpu/build.gradle +++ b/x-pack/plugin/gpu/build.gradle @@ -15,13 +15,12 @@ base { dependencies { compileOnly project(path: xpackModule('core')) compileOnly project(':server') - implementation("com.nvidia.cuvs:cuvs-java:${versions.cuvs_java}") implementation project(':libs:gpu-codec') testImplementation(testArtifact(project(xpackModule('core')))) testImplementation(testArtifact(project(':server'))) yamlRestTestImplementation project(':libs:gpu-codec') - internalClusterTestImplementation('com.nvidia.cuvs:cuvs-java:25.10.0') + internalClusterTestImplementation("com.nvidia.cuvs:cuvs-java:${versions.cuvs_java}") clusterModules project(xpackModule('gpu')) } From a710c4e210d8cdcd6fbe5a73ef2a9c58a1c3802f Mon Sep 17 00:00:00 2001 From: Brian Seeders Date: Tue, 25 Nov 2025 16:21:03 -0500 Subject: [PATCH 13/14] Only call nvidia-smi during CI (not packer) --- .buildkite/scripts/cuvs-snapshot/configure.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.buildkite/scripts/cuvs-snapshot/configure.sh b/.buildkite/scripts/cuvs-snapshot/configure.sh index 7a6bc0dc913e7..7a8d174f61cc6 100755 --- a/.buildkite/scripts/cuvs-snapshot/configure.sh +++ b/.buildkite/scripts/cuvs-snapshot/configure.sh @@ -11,7 +11,9 @@ if [[ -f /etc/profile.d/elastic-nvidia.sh ]]; then # Not running this before the tests results in an error when running the tests # No idea why... - nvidia-smi + if [[ "${BUILDKITE:-}" != "" && "${CI:-}" == "true" ]]; then + nvidia-smi + fi fi LIBCUVS_GCS_BUCKET="elasticsearch-cuvs-snapshots" From 88bd7a4de52b31cf9a1d151eee7eebde48c452e3 Mon Sep 17 00:00:00 2001 From: Brian Seeders Date: Wed, 26 Nov 2025 14:57:42 -0500 Subject: [PATCH 14/14] [CI] Use AWS for GPU instances --- .buildkite/pipelines/cuvs-snapshot/run-tests.yml | 11 ++++++----- .../pipelines/cuvs-snapshot/update-snapshot.yml | 11 ++++++----- .buildkite/pipelines/pull-request/gpu.yml | 11 ++++++----- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/.buildkite/pipelines/cuvs-snapshot/run-tests.yml b/.buildkite/pipelines/cuvs-snapshot/run-tests.yml index fe4e2f8cefd70..5eff1ca66adfb 100644 --- a/.buildkite/pipelines/cuvs-snapshot/run-tests.yml +++ b/.buildkite/pipelines/cuvs-snapshot/run-tests.yml @@ -3,11 +3,12 @@ steps: command: .buildkite/scripts/cuvs-snapshot/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-file-fingerprints {{matrix.GRADLE_TASK}} timeout_in_minutes: 300 agents: - provider: gcp - image: family/elasticsearch-ubuntu-2404-nvidia - machineType: g2-standard-32 - buildDirectory: /dev/shm/bk - zones: us-central1-b,us-central1-c + provider: aws + imagePrefix: elasticsearch-aws-ubuntu-2204-nvidia + instanceType: g6.8xlarge + diskSizeGb: 350 + diskType: gp3 + diskName: /dev/sda1 env: GRADLE_TASK: "{{matrix.GRADLE_TASK}}" matrix: diff --git a/.buildkite/pipelines/cuvs-snapshot/update-snapshot.yml b/.buildkite/pipelines/cuvs-snapshot/update-snapshot.yml index 6011926d782bb..3f381588885c3 100644 --- a/.buildkite/pipelines/cuvs-snapshot/update-snapshot.yml +++ b/.buildkite/pipelines/cuvs-snapshot/update-snapshot.yml @@ -2,11 +2,12 @@ steps: - label: "Smoke test and update new cuVS snapshot" command: .buildkite/scripts/cuvs-snapshot/update-current-snapshot-version.sh agents: - provider: gcp - image: family/elasticsearch-ubuntu-2404-nvidia - machineType: g2-standard-16 - zones: us-central1-b,us-central1-c - diskSizeGb: 150 + provider: aws + imagePrefix: elasticsearch-aws-ubuntu-2204-nvidia + instanceType: g6.2xlarge + diskSizeGb: 350 + diskType: gp3 + diskName: /dev/sda1 - wait: ~ - trigger: "elasticsearch-cuvs-run-tests" build: diff --git a/.buildkite/pipelines/pull-request/gpu.yml b/.buildkite/pipelines/pull-request/gpu.yml index 40afa272ead7c..3868c5085da11 100644 --- a/.buildkite/pipelines/pull-request/gpu.yml +++ b/.buildkite/pipelines/pull-request/gpu.yml @@ -10,11 +10,12 @@ steps: command: .buildkite/scripts/cuvs-snapshot/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-file-fingerprints {{matrix.GRADLE_TASK}} timeout_in_minutes: 300 agents: - provider: gcp - image: family/elasticsearch-ubuntu-2404-nvidia - machineType: g2-standard-32 - buildDirectory: /dev/shm/bk - zones: us-central1-b,us-central1-c + provider: aws + imagePrefix: elasticsearch-aws-ubuntu-2204-nvidia + instanceType: g6.8xlarge + diskSizeGb: 350 + diskType: gp3 + diskName: /dev/sda1 env: GRADLE_TASK: "{{matrix.GRADLE_TASK}}" matrix: