diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 689560bb17d38..e2c2f016aa34f 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -15,6 +15,10 @@ + + + + @@ -1130,7 +1134,7 @@ - + diff --git a/x-pack/plugin/gpu/build.gradle b/x-pack/plugin/gpu/build.gradle index 1ab800cfac4e0..0c4ce21d0fad0 100644 --- a/x-pack/plugin/gpu/build.gradle +++ b/x-pack/plugin/gpu/build.gradle @@ -20,14 +20,12 @@ repositories { dependencies { compileOnly project(path: xpackModule('core')) compileOnly project(':server') - implementation 'com.nvidia.cuvs:cuvs-java:25.08.0' + implementation 'com.nvidia.cuvs:cuvs-java:25.10.0' testImplementation(testArtifact(project(xpackModule('core')))) testImplementation(testArtifact(project(':server'))) clusterModules project(xpackModule('gpu')) } -tasks.named("yamlRestTest") { - usesDefaultDistribution("uses gpu plugin") -} + artifacts { restXpackTests(new File(projectDir, "src/yamlRestTest/resources/rest-api-spec/test")) } diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java index 67fd97faec259..f1ff6bcffd1d2 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java @@ -46,7 +46,7 @@ public static CuVSResources cuVSResourcesOrNull(boolean logError) { if (t instanceof ExceptionInInitializerError ex) { t = ex.getCause(); } - LOG.warn("Exception occurred during creation of cuvs resources. " + t); + LOG.warn("Exception occurred during creation of cuvs resources", t); } } return null; diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtils.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtils.java index 0f90ab4c5bb75..bdc598e876931 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtils.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtils.java @@ -7,7 +7,7 @@ package org.elasticsearch.xpack.gpu.codec; -import com.nvidia.cuvs.Dataset; +import com.nvidia.cuvs.CuVSMatrix; import org.apache.lucene.store.MemorySegmentAccessInput; @@ -20,6 +20,5 @@ static DatasetUtils getInstance() { } /** Returns a Dataset over the float32 vectors in the input. */ - Dataset fromInput(MemorySegmentAccessInput input, int numVectors, int dims) throws IOException; - + CuVSMatrix fromInput(MemorySegmentAccessInput input, int numVectors, int dims) throws IOException; } diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java index 3d6d33028ab8f..0cca7ce5cd7cb 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java @@ -7,12 +7,10 @@ package org.elasticsearch.xpack.gpu.codec; -import com.nvidia.cuvs.Dataset; +import com.nvidia.cuvs.CuVSMatrix; import org.apache.lucene.store.MemorySegmentAccessInput; -import java.io.IOException; - /** Stubb holder - never executed. */ public class DatasetUtilsImpl implements DatasetUtils { @@ -21,7 +19,7 @@ static DatasetUtils getInstance() { } @Override - public Dataset fromInput(MemorySegmentAccessInput input, int numVectors, int dims) throws IOException { + public CuVSMatrix fromInput(MemorySegmentAccessInput input, int numVectors, int dims) { throw new UnsupportedOperationException("should not reach here"); } } diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java index 163e2277137ed..73e11dd24f4d5 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java @@ -9,7 +9,7 @@ import com.nvidia.cuvs.CagraIndex; import com.nvidia.cuvs.CagraIndexParams; -import com.nvidia.cuvs.Dataset; +import com.nvidia.cuvs.CuVSMatrix; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.KnnFieldVectorsWriter; @@ -35,7 +35,6 @@ import org.apache.lucene.util.hnsw.HnswGraph; import org.apache.lucene.util.hnsw.HnswGraph.NodesIterator; import org.apache.lucene.util.packed.DirectMonotonicWriter; -import org.elasticsearch.common.lucene.store.IndexOutputOutputStream; import org.elasticsearch.core.IOUtils; import org.elasticsearch.core.SuppressForbidden; import org.elasticsearch.logging.LogManager; @@ -177,21 +176,21 @@ public long ramBytesUsed() { } private static final class DatasetOrVectors { - private final Dataset dataset; + private final CuVSMatrix dataset; private final float[][] vectors; static DatasetOrVectors fromArray(float[][] vectors) { return new DatasetOrVectors( - vectors.length < MIN_NUM_VECTORS_FOR_GPU_BUILD ? null : Dataset.ofArray(vectors), + vectors.length < MIN_NUM_VECTORS_FOR_GPU_BUILD ? null : CuVSMatrix.ofArray(vectors), vectors.length < MIN_NUM_VECTORS_FOR_GPU_BUILD ? vectors : null ); } - static DatasetOrVectors fromDataset(Dataset dataset) { + static DatasetOrVectors fromDataset(CuVSMatrix dataset) { return new DatasetOrVectors(dataset, null); } - private DatasetOrVectors(Dataset dataset, float[][] vectors) { + private DatasetOrVectors(CuVSMatrix dataset, float[][] vectors) { this.dataset = dataset; this.vectors = vectors; validateState(); @@ -204,10 +203,10 @@ private void validateState() { } int size() { - return dataset != null ? dataset.size() : vectors.length; + return dataset != null ? (int) dataset.size() : vectors.length; } - Dataset getDataset() { + CuVSMatrix getDataset() { return dataset; } @@ -243,9 +242,16 @@ private void writeFieldInternal(FieldInfo fieldInfo, DatasetOrVectors datasetOrV } mockGraph = writeGraph(vectors, graphLevelNodeOffsets); } else { - String tempCagraHNSWFileName = buildGPUIndex(fieldInfo.getVectorSimilarityFunction(), datasetOrVectors.dataset); - assert tempCagraHNSWFileName != null : "GPU index should be built for field: " + fieldInfo.name; - mockGraph = writeGraph(tempCagraHNSWFileName, graphLevelNodeOffsets); + var dataset = datasetOrVectors.dataset; + var cuVSResources = cuVSResourceManager.acquire((int) dataset.size(), (int) dataset.columns()); + try { + try (var index = buildGPUIndex(cuVSResources, fieldInfo.getVectorSimilarityFunction(), dataset)) { + assert index != null : "GPU index should be built for field: " + fieldInfo.name; + mockGraph = writeGraph(index.getGraph(), graphLevelNodeOffsets); + } + } finally { + cuVSResourceManager.release(cuVSResources); + } } long vectorIndexLength = vectorIndex.getFilePointer() - vectorIndexOffset; writeMeta(fieldInfo, vectorIndexOffset, vectorIndexLength, datasetOrVectors.size(), mockGraph, graphLevelNodeOffsets); @@ -256,8 +262,11 @@ private void writeFieldInternal(FieldInfo fieldInfo, DatasetOrVectors datasetOrV } } - @SuppressForbidden(reason = "require usage of Lucene's IOUtils#deleteFilesIgnoringExceptions(...)") - private String buildGPUIndex(VectorSimilarityFunction similarityFunction, Dataset dataset) throws Throwable { + private CagraIndex buildGPUIndex( + CuVSResourceManager.ManagedCuVSResources cuVSResources, + VectorSimilarityFunction similarityFunction, + CuVSMatrix dataset + ) throws Throwable { CagraIndexParams.CuvsDistanceType distanceType = switch (similarityFunction) { case EUCLIDEAN -> CagraIndexParams.CuvsDistanceType.L2Expanded; case DOT_PRODUCT, MAXIMUM_INNER_PRODUCT -> CagraIndexParams.CuvsDistanceType.InnerProduct; @@ -271,134 +280,50 @@ private String buildGPUIndex(VectorSimilarityFunction similarityFunction, Datase .withMetric(distanceType) .build(); - var cuVSResources = cuVSResourceManager.acquire(dataset.size(), dataset.dimensions()); - try { - long startTime = System.nanoTime(); - var indexBuilder = CagraIndex.newBuilder(cuVSResources).withDataset(dataset).withIndexParams(params); - var index = indexBuilder.build(); - cuVSResourceManager.finishedComputation(cuVSResources); - if (logger.isDebugEnabled()) { - logger.debug( - "Carga index created in: {} ms; #num vectors: {}", - (System.nanoTime() - startTime) / 1_000_000.0, - dataset.size() - ); - } - - // TODO: do serialization through MemorySegment instead of a temp file - // serialize index for CPU consumption to the hnwslib format - startTime = System.nanoTime(); - IndexOutput tempCagraHNSW = null; - boolean success = false; - try { - tempCagraHNSW = segmentWriteState.directory.createTempOutput( - vectorIndex.getName(), - "cagra_hnws_temp", - segmentWriteState.context - ); - var tempCagraHNSWOutputStream = new IndexOutputOutputStream(tempCagraHNSW); - index.serializeToHNSW(tempCagraHNSWOutputStream); - if (logger.isDebugEnabled()) { - logger.debug("Carga index serialized to hnswlib format in: {} ms", (System.nanoTime() - startTime) / 1_000_000.0); - } - success = true; - } finally { - index.destroyIndex(); - if (success) { - org.elasticsearch.core.IOUtils.close(tempCagraHNSW); - } else { - if (tempCagraHNSW != null) { - IOUtils.closeWhileHandlingException(tempCagraHNSW); - org.apache.lucene.util.IOUtils.deleteFilesIgnoringExceptions(segmentWriteState.directory, tempCagraHNSW.getName()); - } - } - } - return tempCagraHNSW.getName(); - } finally { - cuVSResourceManager.release(cuVSResources); + long startTime = System.nanoTime(); + var indexBuilder = CagraIndex.newBuilder(cuVSResources).withDataset(dataset).withIndexParams(params); + var index = indexBuilder.build(); + cuVSResourceManager.finishedComputation(cuVSResources); + if (logger.isDebugEnabled()) { + logger.debug("Carga index created in: {} ms; #num vectors: {}", (System.nanoTime() - startTime) / 1_000_000.0, dataset.size()); } + return index; } - @SuppressForbidden(reason = "require usage of Lucene's IOUtils#deleteFilesIgnoringExceptions(...)") - private HnswGraph writeGraph(String tempCagraHNSWFileName, int[][] levelNodeOffsets) throws IOException { + private HnswGraph writeGraph(CuVSMatrix cagraGraph, int[][] levelNodeOffsets) throws IOException { long startTime = System.nanoTime(); - boolean success = false; - IndexInput tempCagraHNSWInput = null; - int maxElementCount; - int maxGraphDegree; - try { - tempCagraHNSWInput = segmentWriteState.directory.openInput(tempCagraHNSWFileName, segmentWriteState.context); - // read the metadata from the hnlswlib format; - // some of them are not used in the Lucene HNSW format - tempCagraHNSWInput.readLong(); // offSetLevel0 - maxElementCount = (int) tempCagraHNSWInput.readLong(); - tempCagraHNSWInput.readLong(); // currElementCount - tempCagraHNSWInput.readLong(); // sizeDataPerElement - long labelOffset = tempCagraHNSWInput.readLong(); - long dataOffset = tempCagraHNSWInput.readLong(); - int maxLevel = tempCagraHNSWInput.readInt(); - tempCagraHNSWInput.readInt(); // entryPointNode - tempCagraHNSWInput.readLong(); // maxM - long maxM0 = tempCagraHNSWInput.readLong(); // number of graph connections - tempCagraHNSWInput.readLong(); // M - tempCagraHNSWInput.readLong(); // mult - tempCagraHNSWInput.readLong(); // efConstruction - - assert (maxLevel == 1) : "Cagra index is flat, maxLevel must be: 1, got: " + maxLevel; - maxGraphDegree = (int) maxM0; - int[] neighbors = new int[maxGraphDegree]; - int dimension = (int) ((labelOffset - dataOffset) / Float.BYTES); - // assert (dimension == dimensionCalculated) - // : "Cagra index vector dimension must be: " + dimension + ", got: " + dimensionCalculated; - - levelNodeOffsets[0] = new int[maxElementCount]; - - // read graph from the cagra_hnswlib index and write it to the Lucene vectorIndex file - int[] scratch = new int[maxGraphDegree]; - for (int node = 0; node < maxElementCount; node++) { - // read from the cagra_hnswlib index - int nodeDegree = tempCagraHNSWInput.readInt(); - assert (nodeDegree == maxGraphDegree) - : "In Cagra graph all nodes must have the same number of connections : " + maxGraphDegree + ", got" + nodeDegree; - for (int i = 0; i < nodeDegree; i++) { - neighbors[i] = tempCagraHNSWInput.readInt(); - } - // Skip over the vector data - tempCagraHNSWInput.seek(tempCagraHNSWInput.getFilePointer() + dimension * Float.BYTES); - // Skip over the label/id - tempCagraHNSWInput.seek(tempCagraHNSWInput.getFilePointer() + Long.BYTES); - - // write to the Lucene vectorIndex file - long offsetStart = vectorIndex.getFilePointer(); - Arrays.sort(neighbors); - int actualSize = 0; - scratch[actualSize++] = neighbors[0]; - for (int i = 1; i < nodeDegree; i++) { - assert neighbors[i] < maxElementCount : "node too large: " + neighbors[i] + ">=" + maxElementCount; - if (neighbors[i - 1] == neighbors[i]) { - continue; - } - scratch[actualSize++] = neighbors[i] - neighbors[i - 1]; - } - // Write the size after duplicates are removed - vectorIndex.writeVInt(actualSize); - for (int i = 0; i < actualSize; i++) { - vectorIndex.writeVInt(scratch[i]); + int maxElementCount = (int) cagraGraph.size(); + int maxGraphDegree = (int) cagraGraph.columns(); + int[] neighbors = new int[maxGraphDegree]; + + levelNodeOffsets[0] = new int[maxElementCount]; + // write the cagra graph to the Lucene vectorIndex file + int[] scratch = new int[maxGraphDegree]; + for (int node = 0; node < maxElementCount; node++) { + cagraGraph.getRow(node).toArray(neighbors); + + // write to the Lucene vectorIndex file + long offsetStart = vectorIndex.getFilePointer(); + Arrays.sort(neighbors); + int actualSize = 0; + scratch[actualSize++] = neighbors[0]; + for (int i = 1; i < maxGraphDegree; i++) { + assert neighbors[i] < maxElementCount : "node too large: " + neighbors[i] + ">=" + maxElementCount; + if (neighbors[i - 1] == neighbors[i]) { + continue; } - levelNodeOffsets[0][node] = Math.toIntExact(vectorIndex.getFilePointer() - offsetStart); - } - if (logger.isDebugEnabled()) { - logger.debug("cagra_hnws index serialized to Lucene HNSW in: {} ms", (System.nanoTime() - startTime) / 1_000_000.0); + scratch[actualSize++] = neighbors[i] - neighbors[i - 1]; } - success = true; - } finally { - if (success) { - IOUtils.close(tempCagraHNSWInput); - } else { - IOUtils.closeWhileHandlingException(tempCagraHNSWInput); + // Write the size after duplicates are removed + vectorIndex.writeVInt(actualSize); + for (int i = 0; i < actualSize; i++) { + vectorIndex.writeVInt(scratch[i]); } - org.apache.lucene.util.IOUtils.deleteFilesIgnoringExceptions(segmentWriteState.directory, tempCagraHNSWFileName); + levelNodeOffsets[0][node] = Math.toIntExact(vectorIndex.getFilePointer() - offsetStart); + } + if (logger.isDebugEnabled()) { + logger.debug("cagra_hnws index serialized to Lucene HNSW in: {} ms", (System.nanoTime() - startTime) / 1_000_000.0); } return createMockGraph(maxElementCount, maxGraphDegree); } diff --git a/x-pack/plugin/gpu/src/main21/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java b/x-pack/plugin/gpu/src/main21/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java index 998154e1ac303..01bcde9dcc0e0 100644 --- a/x-pack/plugin/gpu/src/main21/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java +++ b/x-pack/plugin/gpu/src/main21/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java @@ -7,7 +7,7 @@ package org.elasticsearch.xpack.gpu.codec; -import com.nvidia.cuvs.Dataset; +import com.nvidia.cuvs.CuVSMatrix; import com.nvidia.cuvs.spi.CuVSProvider; import org.apache.lucene.store.MemorySegmentAccessInput; @@ -20,15 +20,15 @@ public class DatasetUtilsImpl implements DatasetUtils { private static final DatasetUtils INSTANCE = new DatasetUtilsImpl(); - private static final MethodHandle createDataset$mh = CuVSProvider.provider().newNativeDatasetBuilder(); + private static final MethodHandle createDataset$mh = CuVSProvider.provider().newNativeMatrixBuilder(); static DatasetUtils getInstance() { return INSTANCE; } - static Dataset fromMemorySegment(MemorySegment memorySegment, int size, int dimensions) { + static CuVSMatrix fromMemorySegment(MemorySegment memorySegment, int size, int dimensions, CuVSMatrix.DataType dataType) { try { - return (Dataset) createDataset$mh.invokeExact(memorySegment, size, dimensions); + return (CuVSMatrix) createDataset$mh.invokeExact(memorySegment, size, dimensions, dataType); } catch (Throwable e) { if (e instanceof Error err) { throw err; @@ -43,7 +43,7 @@ static Dataset fromMemorySegment(MemorySegment memorySegment, int size, int dime private DatasetUtilsImpl() {} @Override - public Dataset fromInput(MemorySegmentAccessInput input, int numVectors, int dims) throws IOException { + public CuVSMatrix fromInput(MemorySegmentAccessInput input, int numVectors, int dims) throws IOException { if (numVectors < 0 || dims < 0) { throwIllegalArgumentException(numVectors, dims); } @@ -52,7 +52,7 @@ public Dataset fromInput(MemorySegmentAccessInput input, int numVectors, int dim if (((long) numVectors * dims * Float.BYTES) > ms.byteSize()) { throwIllegalArgumentException(ms, numVectors, dims); } - return fromMemorySegment(ms, numVectors, dims); + return fromMemorySegment(ms, numVectors, dims, CuVSMatrix.DataType.FLOAT); } static void throwIllegalArgumentException(MemorySegment ms, int numVectors, int dims) { diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsTests.java index bfc4ee6d48d0d..0c9c63257c0e8 100644 --- a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsTests.java +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsTests.java @@ -53,7 +53,7 @@ public void testBasic() throws Exception { var dataset = datasetUtils.fromInput((MemorySegmentAccessInput) in, numVecs, dims) ) { assertEquals(numVecs, dataset.size()); - assertEquals(dims, dataset.dimensions()); + assertEquals(dims, dataset.columns()); } } } diff --git a/x-pack/plugin/gpu/src/yamlRestTest/java/org/elasticsearch/xpack/gpu/GPUClientYamlTestSuiteIT.java b/x-pack/plugin/gpu/src/yamlRestTest/java/org/elasticsearch/xpack/gpu/GPUClientYamlTestSuiteIT.java index 0f4a7a059b6d4..4bfaab9243d90 100644 --- a/x-pack/plugin/gpu/src/yamlRestTest/java/org/elasticsearch/xpack/gpu/GPUClientYamlTestSuiteIT.java +++ b/x-pack/plugin/gpu/src/yamlRestTest/java/org/elasticsearch/xpack/gpu/GPUClientYamlTestSuiteIT.java @@ -21,6 +21,7 @@ public class GPUClientYamlTestSuiteIT extends ESClientYamlSuiteTestCase { .module("gpu") .setting("xpack.license.self_generated.type", "trial") .setting("xpack.security.enabled", "false") + .environment("LD_LIBRARY_PATH", System.getenv("LD_LIBRARY_PATH")) .build(); public GPUClientYamlTestSuiteIT(final ClientYamlTestCandidate testCandidate) {