diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml
index 689560bb17d38..e2c2f016aa34f 100644
--- a/gradle/verification-metadata.xml
+++ b/gradle/verification-metadata.xml
@@ -15,6 +15,10 @@
+
+
+
+
@@ -1130,7 +1134,7 @@
-
+
diff --git a/x-pack/plugin/gpu/build.gradle b/x-pack/plugin/gpu/build.gradle
index 1ab800cfac4e0..0c4ce21d0fad0 100644
--- a/x-pack/plugin/gpu/build.gradle
+++ b/x-pack/plugin/gpu/build.gradle
@@ -20,14 +20,12 @@ repositories {
dependencies {
compileOnly project(path: xpackModule('core'))
compileOnly project(':server')
- implementation 'com.nvidia.cuvs:cuvs-java:25.08.0'
+ implementation 'com.nvidia.cuvs:cuvs-java:25.10.0'
testImplementation(testArtifact(project(xpackModule('core'))))
testImplementation(testArtifact(project(':server')))
clusterModules project(xpackModule('gpu'))
}
-tasks.named("yamlRestTest") {
- usesDefaultDistribution("uses gpu plugin")
-}
+
artifacts {
restXpackTests(new File(projectDir, "src/yamlRestTest/resources/rest-api-spec/test"))
}
diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java
index 67fd97faec259..f1ff6bcffd1d2 100644
--- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java
+++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java
@@ -46,7 +46,7 @@ public static CuVSResources cuVSResourcesOrNull(boolean logError) {
if (t instanceof ExceptionInInitializerError ex) {
t = ex.getCause();
}
- LOG.warn("Exception occurred during creation of cuvs resources. " + t);
+ LOG.warn("Exception occurred during creation of cuvs resources", t);
}
}
return null;
diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtils.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtils.java
index 0f90ab4c5bb75..bdc598e876931 100644
--- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtils.java
+++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtils.java
@@ -7,7 +7,7 @@
package org.elasticsearch.xpack.gpu.codec;
-import com.nvidia.cuvs.Dataset;
+import com.nvidia.cuvs.CuVSMatrix;
import org.apache.lucene.store.MemorySegmentAccessInput;
@@ -20,6 +20,5 @@ static DatasetUtils getInstance() {
}
/** Returns a Dataset over the float32 vectors in the input. */
- Dataset fromInput(MemorySegmentAccessInput input, int numVectors, int dims) throws IOException;
-
+ CuVSMatrix fromInput(MemorySegmentAccessInput input, int numVectors, int dims) throws IOException;
}
diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java
index 3d6d33028ab8f..0cca7ce5cd7cb 100644
--- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java
+++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java
@@ -7,12 +7,10 @@
package org.elasticsearch.xpack.gpu.codec;
-import com.nvidia.cuvs.Dataset;
+import com.nvidia.cuvs.CuVSMatrix;
import org.apache.lucene.store.MemorySegmentAccessInput;
-import java.io.IOException;
-
/** Stubb holder - never executed. */
public class DatasetUtilsImpl implements DatasetUtils {
@@ -21,7 +19,7 @@ static DatasetUtils getInstance() {
}
@Override
- public Dataset fromInput(MemorySegmentAccessInput input, int numVectors, int dims) throws IOException {
+ public CuVSMatrix fromInput(MemorySegmentAccessInput input, int numVectors, int dims) {
throw new UnsupportedOperationException("should not reach here");
}
}
diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java
index 163e2277137ed..73e11dd24f4d5 100644
--- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java
+++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java
@@ -9,7 +9,7 @@
import com.nvidia.cuvs.CagraIndex;
import com.nvidia.cuvs.CagraIndexParams;
-import com.nvidia.cuvs.Dataset;
+import com.nvidia.cuvs.CuVSMatrix;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.KnnFieldVectorsWriter;
@@ -35,7 +35,6 @@
import org.apache.lucene.util.hnsw.HnswGraph;
import org.apache.lucene.util.hnsw.HnswGraph.NodesIterator;
import org.apache.lucene.util.packed.DirectMonotonicWriter;
-import org.elasticsearch.common.lucene.store.IndexOutputOutputStream;
import org.elasticsearch.core.IOUtils;
import org.elasticsearch.core.SuppressForbidden;
import org.elasticsearch.logging.LogManager;
@@ -177,21 +176,21 @@ public long ramBytesUsed() {
}
private static final class DatasetOrVectors {
- private final Dataset dataset;
+ private final CuVSMatrix dataset;
private final float[][] vectors;
static DatasetOrVectors fromArray(float[][] vectors) {
return new DatasetOrVectors(
- vectors.length < MIN_NUM_VECTORS_FOR_GPU_BUILD ? null : Dataset.ofArray(vectors),
+ vectors.length < MIN_NUM_VECTORS_FOR_GPU_BUILD ? null : CuVSMatrix.ofArray(vectors),
vectors.length < MIN_NUM_VECTORS_FOR_GPU_BUILD ? vectors : null
);
}
- static DatasetOrVectors fromDataset(Dataset dataset) {
+ static DatasetOrVectors fromDataset(CuVSMatrix dataset) {
return new DatasetOrVectors(dataset, null);
}
- private DatasetOrVectors(Dataset dataset, float[][] vectors) {
+ private DatasetOrVectors(CuVSMatrix dataset, float[][] vectors) {
this.dataset = dataset;
this.vectors = vectors;
validateState();
@@ -204,10 +203,10 @@ private void validateState() {
}
int size() {
- return dataset != null ? dataset.size() : vectors.length;
+ return dataset != null ? (int) dataset.size() : vectors.length;
}
- Dataset getDataset() {
+ CuVSMatrix getDataset() {
return dataset;
}
@@ -243,9 +242,16 @@ private void writeFieldInternal(FieldInfo fieldInfo, DatasetOrVectors datasetOrV
}
mockGraph = writeGraph(vectors, graphLevelNodeOffsets);
} else {
- String tempCagraHNSWFileName = buildGPUIndex(fieldInfo.getVectorSimilarityFunction(), datasetOrVectors.dataset);
- assert tempCagraHNSWFileName != null : "GPU index should be built for field: " + fieldInfo.name;
- mockGraph = writeGraph(tempCagraHNSWFileName, graphLevelNodeOffsets);
+ var dataset = datasetOrVectors.dataset;
+ var cuVSResources = cuVSResourceManager.acquire((int) dataset.size(), (int) dataset.columns());
+ try {
+ try (var index = buildGPUIndex(cuVSResources, fieldInfo.getVectorSimilarityFunction(), dataset)) {
+ assert index != null : "GPU index should be built for field: " + fieldInfo.name;
+ mockGraph = writeGraph(index.getGraph(), graphLevelNodeOffsets);
+ }
+ } finally {
+ cuVSResourceManager.release(cuVSResources);
+ }
}
long vectorIndexLength = vectorIndex.getFilePointer() - vectorIndexOffset;
writeMeta(fieldInfo, vectorIndexOffset, vectorIndexLength, datasetOrVectors.size(), mockGraph, graphLevelNodeOffsets);
@@ -256,8 +262,11 @@ private void writeFieldInternal(FieldInfo fieldInfo, DatasetOrVectors datasetOrV
}
}
- @SuppressForbidden(reason = "require usage of Lucene's IOUtils#deleteFilesIgnoringExceptions(...)")
- private String buildGPUIndex(VectorSimilarityFunction similarityFunction, Dataset dataset) throws Throwable {
+ private CagraIndex buildGPUIndex(
+ CuVSResourceManager.ManagedCuVSResources cuVSResources,
+ VectorSimilarityFunction similarityFunction,
+ CuVSMatrix dataset
+ ) throws Throwable {
CagraIndexParams.CuvsDistanceType distanceType = switch (similarityFunction) {
case EUCLIDEAN -> CagraIndexParams.CuvsDistanceType.L2Expanded;
case DOT_PRODUCT, MAXIMUM_INNER_PRODUCT -> CagraIndexParams.CuvsDistanceType.InnerProduct;
@@ -271,134 +280,50 @@ private String buildGPUIndex(VectorSimilarityFunction similarityFunction, Datase
.withMetric(distanceType)
.build();
- var cuVSResources = cuVSResourceManager.acquire(dataset.size(), dataset.dimensions());
- try {
- long startTime = System.nanoTime();
- var indexBuilder = CagraIndex.newBuilder(cuVSResources).withDataset(dataset).withIndexParams(params);
- var index = indexBuilder.build();
- cuVSResourceManager.finishedComputation(cuVSResources);
- if (logger.isDebugEnabled()) {
- logger.debug(
- "Carga index created in: {} ms; #num vectors: {}",
- (System.nanoTime() - startTime) / 1_000_000.0,
- dataset.size()
- );
- }
-
- // TODO: do serialization through MemorySegment instead of a temp file
- // serialize index for CPU consumption to the hnwslib format
- startTime = System.nanoTime();
- IndexOutput tempCagraHNSW = null;
- boolean success = false;
- try {
- tempCagraHNSW = segmentWriteState.directory.createTempOutput(
- vectorIndex.getName(),
- "cagra_hnws_temp",
- segmentWriteState.context
- );
- var tempCagraHNSWOutputStream = new IndexOutputOutputStream(tempCagraHNSW);
- index.serializeToHNSW(tempCagraHNSWOutputStream);
- if (logger.isDebugEnabled()) {
- logger.debug("Carga index serialized to hnswlib format in: {} ms", (System.nanoTime() - startTime) / 1_000_000.0);
- }
- success = true;
- } finally {
- index.destroyIndex();
- if (success) {
- org.elasticsearch.core.IOUtils.close(tempCagraHNSW);
- } else {
- if (tempCagraHNSW != null) {
- IOUtils.closeWhileHandlingException(tempCagraHNSW);
- org.apache.lucene.util.IOUtils.deleteFilesIgnoringExceptions(segmentWriteState.directory, tempCagraHNSW.getName());
- }
- }
- }
- return tempCagraHNSW.getName();
- } finally {
- cuVSResourceManager.release(cuVSResources);
+ long startTime = System.nanoTime();
+ var indexBuilder = CagraIndex.newBuilder(cuVSResources).withDataset(dataset).withIndexParams(params);
+ var index = indexBuilder.build();
+ cuVSResourceManager.finishedComputation(cuVSResources);
+ if (logger.isDebugEnabled()) {
+ logger.debug("Carga index created in: {} ms; #num vectors: {}", (System.nanoTime() - startTime) / 1_000_000.0, dataset.size());
}
+ return index;
}
- @SuppressForbidden(reason = "require usage of Lucene's IOUtils#deleteFilesIgnoringExceptions(...)")
- private HnswGraph writeGraph(String tempCagraHNSWFileName, int[][] levelNodeOffsets) throws IOException {
+ private HnswGraph writeGraph(CuVSMatrix cagraGraph, int[][] levelNodeOffsets) throws IOException {
long startTime = System.nanoTime();
- boolean success = false;
- IndexInput tempCagraHNSWInput = null;
- int maxElementCount;
- int maxGraphDegree;
- try {
- tempCagraHNSWInput = segmentWriteState.directory.openInput(tempCagraHNSWFileName, segmentWriteState.context);
- // read the metadata from the hnlswlib format;
- // some of them are not used in the Lucene HNSW format
- tempCagraHNSWInput.readLong(); // offSetLevel0
- maxElementCount = (int) tempCagraHNSWInput.readLong();
- tempCagraHNSWInput.readLong(); // currElementCount
- tempCagraHNSWInput.readLong(); // sizeDataPerElement
- long labelOffset = tempCagraHNSWInput.readLong();
- long dataOffset = tempCagraHNSWInput.readLong();
- int maxLevel = tempCagraHNSWInput.readInt();
- tempCagraHNSWInput.readInt(); // entryPointNode
- tempCagraHNSWInput.readLong(); // maxM
- long maxM0 = tempCagraHNSWInput.readLong(); // number of graph connections
- tempCagraHNSWInput.readLong(); // M
- tempCagraHNSWInput.readLong(); // mult
- tempCagraHNSWInput.readLong(); // efConstruction
-
- assert (maxLevel == 1) : "Cagra index is flat, maxLevel must be: 1, got: " + maxLevel;
- maxGraphDegree = (int) maxM0;
- int[] neighbors = new int[maxGraphDegree];
- int dimension = (int) ((labelOffset - dataOffset) / Float.BYTES);
- // assert (dimension == dimensionCalculated)
- // : "Cagra index vector dimension must be: " + dimension + ", got: " + dimensionCalculated;
-
- levelNodeOffsets[0] = new int[maxElementCount];
-
- // read graph from the cagra_hnswlib index and write it to the Lucene vectorIndex file
- int[] scratch = new int[maxGraphDegree];
- for (int node = 0; node < maxElementCount; node++) {
- // read from the cagra_hnswlib index
- int nodeDegree = tempCagraHNSWInput.readInt();
- assert (nodeDegree == maxGraphDegree)
- : "In Cagra graph all nodes must have the same number of connections : " + maxGraphDegree + ", got" + nodeDegree;
- for (int i = 0; i < nodeDegree; i++) {
- neighbors[i] = tempCagraHNSWInput.readInt();
- }
- // Skip over the vector data
- tempCagraHNSWInput.seek(tempCagraHNSWInput.getFilePointer() + dimension * Float.BYTES);
- // Skip over the label/id
- tempCagraHNSWInput.seek(tempCagraHNSWInput.getFilePointer() + Long.BYTES);
-
- // write to the Lucene vectorIndex file
- long offsetStart = vectorIndex.getFilePointer();
- Arrays.sort(neighbors);
- int actualSize = 0;
- scratch[actualSize++] = neighbors[0];
- for (int i = 1; i < nodeDegree; i++) {
- assert neighbors[i] < maxElementCount : "node too large: " + neighbors[i] + ">=" + maxElementCount;
- if (neighbors[i - 1] == neighbors[i]) {
- continue;
- }
- scratch[actualSize++] = neighbors[i] - neighbors[i - 1];
- }
- // Write the size after duplicates are removed
- vectorIndex.writeVInt(actualSize);
- for (int i = 0; i < actualSize; i++) {
- vectorIndex.writeVInt(scratch[i]);
+ int maxElementCount = (int) cagraGraph.size();
+ int maxGraphDegree = (int) cagraGraph.columns();
+ int[] neighbors = new int[maxGraphDegree];
+
+ levelNodeOffsets[0] = new int[maxElementCount];
+ // write the cagra graph to the Lucene vectorIndex file
+ int[] scratch = new int[maxGraphDegree];
+ for (int node = 0; node < maxElementCount; node++) {
+ cagraGraph.getRow(node).toArray(neighbors);
+
+ // write to the Lucene vectorIndex file
+ long offsetStart = vectorIndex.getFilePointer();
+ Arrays.sort(neighbors);
+ int actualSize = 0;
+ scratch[actualSize++] = neighbors[0];
+ for (int i = 1; i < maxGraphDegree; i++) {
+ assert neighbors[i] < maxElementCount : "node too large: " + neighbors[i] + ">=" + maxElementCount;
+ if (neighbors[i - 1] == neighbors[i]) {
+ continue;
}
- levelNodeOffsets[0][node] = Math.toIntExact(vectorIndex.getFilePointer() - offsetStart);
- }
- if (logger.isDebugEnabled()) {
- logger.debug("cagra_hnws index serialized to Lucene HNSW in: {} ms", (System.nanoTime() - startTime) / 1_000_000.0);
+ scratch[actualSize++] = neighbors[i] - neighbors[i - 1];
}
- success = true;
- } finally {
- if (success) {
- IOUtils.close(tempCagraHNSWInput);
- } else {
- IOUtils.closeWhileHandlingException(tempCagraHNSWInput);
+ // Write the size after duplicates are removed
+ vectorIndex.writeVInt(actualSize);
+ for (int i = 0; i < actualSize; i++) {
+ vectorIndex.writeVInt(scratch[i]);
}
- org.apache.lucene.util.IOUtils.deleteFilesIgnoringExceptions(segmentWriteState.directory, tempCagraHNSWFileName);
+ levelNodeOffsets[0][node] = Math.toIntExact(vectorIndex.getFilePointer() - offsetStart);
+ }
+ if (logger.isDebugEnabled()) {
+ logger.debug("cagra_hnws index serialized to Lucene HNSW in: {} ms", (System.nanoTime() - startTime) / 1_000_000.0);
}
return createMockGraph(maxElementCount, maxGraphDegree);
}
diff --git a/x-pack/plugin/gpu/src/main21/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java b/x-pack/plugin/gpu/src/main21/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java
index 998154e1ac303..01bcde9dcc0e0 100644
--- a/x-pack/plugin/gpu/src/main21/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java
+++ b/x-pack/plugin/gpu/src/main21/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java
@@ -7,7 +7,7 @@
package org.elasticsearch.xpack.gpu.codec;
-import com.nvidia.cuvs.Dataset;
+import com.nvidia.cuvs.CuVSMatrix;
import com.nvidia.cuvs.spi.CuVSProvider;
import org.apache.lucene.store.MemorySegmentAccessInput;
@@ -20,15 +20,15 @@ public class DatasetUtilsImpl implements DatasetUtils {
private static final DatasetUtils INSTANCE = new DatasetUtilsImpl();
- private static final MethodHandle createDataset$mh = CuVSProvider.provider().newNativeDatasetBuilder();
+ private static final MethodHandle createDataset$mh = CuVSProvider.provider().newNativeMatrixBuilder();
static DatasetUtils getInstance() {
return INSTANCE;
}
- static Dataset fromMemorySegment(MemorySegment memorySegment, int size, int dimensions) {
+ static CuVSMatrix fromMemorySegment(MemorySegment memorySegment, int size, int dimensions, CuVSMatrix.DataType dataType) {
try {
- return (Dataset) createDataset$mh.invokeExact(memorySegment, size, dimensions);
+ return (CuVSMatrix) createDataset$mh.invokeExact(memorySegment, size, dimensions, dataType);
} catch (Throwable e) {
if (e instanceof Error err) {
throw err;
@@ -43,7 +43,7 @@ static Dataset fromMemorySegment(MemorySegment memorySegment, int size, int dime
private DatasetUtilsImpl() {}
@Override
- public Dataset fromInput(MemorySegmentAccessInput input, int numVectors, int dims) throws IOException {
+ public CuVSMatrix fromInput(MemorySegmentAccessInput input, int numVectors, int dims) throws IOException {
if (numVectors < 0 || dims < 0) {
throwIllegalArgumentException(numVectors, dims);
}
@@ -52,7 +52,7 @@ public Dataset fromInput(MemorySegmentAccessInput input, int numVectors, int dim
if (((long) numVectors * dims * Float.BYTES) > ms.byteSize()) {
throwIllegalArgumentException(ms, numVectors, dims);
}
- return fromMemorySegment(ms, numVectors, dims);
+ return fromMemorySegment(ms, numVectors, dims, CuVSMatrix.DataType.FLOAT);
}
static void throwIllegalArgumentException(MemorySegment ms, int numVectors, int dims) {
diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsTests.java
index bfc4ee6d48d0d..0c9c63257c0e8 100644
--- a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsTests.java
+++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsTests.java
@@ -53,7 +53,7 @@ public void testBasic() throws Exception {
var dataset = datasetUtils.fromInput((MemorySegmentAccessInput) in, numVecs, dims)
) {
assertEquals(numVecs, dataset.size());
- assertEquals(dims, dataset.dimensions());
+ assertEquals(dims, dataset.columns());
}
}
}
diff --git a/x-pack/plugin/gpu/src/yamlRestTest/java/org/elasticsearch/xpack/gpu/GPUClientYamlTestSuiteIT.java b/x-pack/plugin/gpu/src/yamlRestTest/java/org/elasticsearch/xpack/gpu/GPUClientYamlTestSuiteIT.java
index 0f4a7a059b6d4..4bfaab9243d90 100644
--- a/x-pack/plugin/gpu/src/yamlRestTest/java/org/elasticsearch/xpack/gpu/GPUClientYamlTestSuiteIT.java
+++ b/x-pack/plugin/gpu/src/yamlRestTest/java/org/elasticsearch/xpack/gpu/GPUClientYamlTestSuiteIT.java
@@ -21,6 +21,7 @@ public class GPUClientYamlTestSuiteIT extends ESClientYamlSuiteTestCase {
.module("gpu")
.setting("xpack.license.self_generated.type", "trial")
.setting("xpack.security.enabled", "false")
+ .environment("LD_LIBRARY_PATH", System.getenv("LD_LIBRARY_PATH"))
.build();
public GPUClientYamlTestSuiteIT(final ClientYamlTestCandidate testCandidate) {