Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion gradle/verification-metadata.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1130,7 +1130,12 @@
</component>
<component group="com.nvidia.cuvs" name="cuvs-java" version="25.08.0">
<artifact name="cuvs-java-25.08.0.jar">
<sha256 value="edec77b7b3cc20d7cc32f97a66f50bf805ed3c88eac12f1a6b43cdabb062a007" origin="Generated by Gradle"/>
<sha256 value="d7ae03068bb58e71c3f6b7cb05fde1665bb0121a9daf0b6ce86502bc1a5829d5" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="com.nvidia.cuvs" name="cuvs-java" version="25.10.0">
<artifact name="cuvs-java-25.10.0.jar">
<sha256 value="88e9911ad3006061729387c3aa194dede0695003428b087f7c987fa2c84c7e3d" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="com.perforce" name="p4java" version="2015.2.1365273">
Expand Down
2 changes: 1 addition & 1 deletion x-pack/plugin/gpu/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ repositories {
dependencies {
compileOnly project(path: xpackModule('core'))
compileOnly project(':server')
implementation 'com.nvidia.cuvs:cuvs-java:25.08.0'
implementation 'com.nvidia.cuvs:cuvs-java:25.10.0'
testImplementation(testArtifact(project(xpackModule('core'))))
testImplementation(testArtifact(project(':server')))
clusterModules project(xpackModule('gpu'))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

package org.elasticsearch.xpack.gpu.codec;

import com.nvidia.cuvs.Dataset;
import com.nvidia.cuvs.CuVSMatrix;

import org.apache.lucene.store.MemorySegmentAccessInput;

Expand All @@ -20,6 +20,5 @@ static DatasetUtils getInstance() {
}

/** Returns a Dataset over the float32 vectors in the input. */
Dataset fromInput(MemorySegmentAccessInput input, int numVectors, int dims) throws IOException;

CuVSMatrix fromInput(MemorySegmentAccessInput input, int numVectors, int dims) throws IOException;
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,10 @@

package org.elasticsearch.xpack.gpu.codec;

import com.nvidia.cuvs.Dataset;
import com.nvidia.cuvs.CuVSMatrix;

import org.apache.lucene.store.MemorySegmentAccessInput;

import java.io.IOException;

/** Stubb holder - never executed. */
public class DatasetUtilsImpl implements DatasetUtils {

Expand All @@ -21,7 +19,7 @@ static DatasetUtils getInstance() {
}

@Override
public Dataset fromInput(MemorySegmentAccessInput input, int numVectors, int dims) throws IOException {
public CuVSMatrix fromInput(MemorySegmentAccessInput input, int numVectors, int dims) {
throw new UnsupportedOperationException("should not reach here");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

import com.nvidia.cuvs.CagraIndex;
import com.nvidia.cuvs.CagraIndexParams;
import com.nvidia.cuvs.Dataset;
import com.nvidia.cuvs.CuVSMatrix;

import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.KnnFieldVectorsWriter;
Expand All @@ -35,7 +35,6 @@
import org.apache.lucene.util.hnsw.HnswGraph;
import org.apache.lucene.util.hnsw.HnswGraph.NodesIterator;
import org.apache.lucene.util.packed.DirectMonotonicWriter;
import org.elasticsearch.common.lucene.store.IndexOutputOutputStream;
import org.elasticsearch.core.IOUtils;
import org.elasticsearch.core.SuppressForbidden;
import org.elasticsearch.logging.LogManager;
Expand Down Expand Up @@ -177,21 +176,21 @@ public long ramBytesUsed() {
}

private static final class DatasetOrVectors {
private final Dataset dataset;
private final CuVSMatrix dataset;
private final float[][] vectors;

static DatasetOrVectors fromArray(float[][] vectors) {
return new DatasetOrVectors(
vectors.length < MIN_NUM_VECTORS_FOR_GPU_BUILD ? null : Dataset.ofArray(vectors),
vectors.length < MIN_NUM_VECTORS_FOR_GPU_BUILD ? null : CuVSMatrix.ofArray(vectors),
vectors.length < MIN_NUM_VECTORS_FOR_GPU_BUILD ? vectors : null
);
}

static DatasetOrVectors fromDataset(Dataset dataset) {
static DatasetOrVectors fromDataset(CuVSMatrix dataset) {
return new DatasetOrVectors(dataset, null);
}

private DatasetOrVectors(Dataset dataset, float[][] vectors) {
private DatasetOrVectors(CuVSMatrix dataset, float[][] vectors) {
this.dataset = dataset;
this.vectors = vectors;
validateState();
Expand All @@ -204,10 +203,10 @@ private void validateState() {
}

int size() {
return dataset != null ? dataset.size() : vectors.length;
return dataset != null ? (int) dataset.size() : vectors.length;
}

Dataset getDataset() {
CuVSMatrix getDataset() {
return dataset;
}

Expand Down Expand Up @@ -243,9 +242,21 @@ private void writeFieldInternal(FieldInfo fieldInfo, DatasetOrVectors datasetOrV
}
mockGraph = writeGraph(vectors, graphLevelNodeOffsets);
} else {
String tempCagraHNSWFileName = buildGPUIndex(fieldInfo.getVectorSimilarityFunction(), datasetOrVectors.dataset);
assert tempCagraHNSWFileName != null : "GPU index should be built for field: " + fieldInfo.name;
mockGraph = writeGraph(tempCagraHNSWFileName, graphLevelNodeOffsets);
var dataset = datasetOrVectors.dataset;
var cuVSResources = cuVSResourceManager.acquire((int) dataset.size(), (int) dataset.columns());
try {
var index = buildGPUIndex(cuVSResources, fieldInfo.getVectorSimilarityFunction(), dataset);
try {
assert index != null : "GPU index should be built for field: " + fieldInfo.name;
mockGraph = writeGraph(index.getGraph(), graphLevelNodeOffsets);
} finally {
if (index != null) {
index.destroyIndex();
}
}
} finally {
cuVSResourceManager.release(cuVSResources);
}
}
long vectorIndexLength = vectorIndex.getFilePointer() - vectorIndexOffset;
writeMeta(fieldInfo, vectorIndexOffset, vectorIndexLength, datasetOrVectors.size(), mockGraph, graphLevelNodeOffsets);
Expand All @@ -256,8 +267,11 @@ private void writeFieldInternal(FieldInfo fieldInfo, DatasetOrVectors datasetOrV
}
}

@SuppressForbidden(reason = "require usage of Lucene's IOUtils#deleteFilesIgnoringExceptions(...)")
private String buildGPUIndex(VectorSimilarityFunction similarityFunction, Dataset dataset) throws Throwable {
private CagraIndex buildGPUIndex(
CuVSResourceManager.ManagedCuVSResources cuVSResources,
VectorSimilarityFunction similarityFunction,
CuVSMatrix dataset
) throws Throwable {
CagraIndexParams.CuvsDistanceType distanceType = switch (similarityFunction) {
case EUCLIDEAN -> CagraIndexParams.CuvsDistanceType.L2Expanded;
case DOT_PRODUCT, MAXIMUM_INNER_PRODUCT -> CagraIndexParams.CuvsDistanceType.InnerProduct;
Expand All @@ -271,134 +285,49 @@ private String buildGPUIndex(VectorSimilarityFunction similarityFunction, Datase
.withMetric(distanceType)
.build();

var cuVSResources = cuVSResourceManager.acquire(dataset.size(), dataset.dimensions());
try {
long startTime = System.nanoTime();
var indexBuilder = CagraIndex.newBuilder(cuVSResources).withDataset(dataset).withIndexParams(params);
var index = indexBuilder.build();
cuVSResourceManager.finishedComputation(cuVSResources);
if (logger.isDebugEnabled()) {
logger.debug(
"Carga index created in: {} ms; #num vectors: {}",
(System.nanoTime() - startTime) / 1_000_000.0,
dataset.size()
);
}

// TODO: do serialization through MemorySegment instead of a temp file
// serialize index for CPU consumption to the hnwslib format
startTime = System.nanoTime();
IndexOutput tempCagraHNSW = null;
boolean success = false;
try {
tempCagraHNSW = segmentWriteState.directory.createTempOutput(
vectorIndex.getName(),
"cagra_hnws_temp",
segmentWriteState.context
);
var tempCagraHNSWOutputStream = new IndexOutputOutputStream(tempCagraHNSW);
index.serializeToHNSW(tempCagraHNSWOutputStream);
if (logger.isDebugEnabled()) {
logger.debug("Carga index serialized to hnswlib format in: {} ms", (System.nanoTime() - startTime) / 1_000_000.0);
}
success = true;
} finally {
index.destroyIndex();
if (success) {
org.elasticsearch.core.IOUtils.close(tempCagraHNSW);
} else {
if (tempCagraHNSW != null) {
IOUtils.closeWhileHandlingException(tempCagraHNSW);
org.apache.lucene.util.IOUtils.deleteFilesIgnoringExceptions(segmentWriteState.directory, tempCagraHNSW.getName());
}
}
}
return tempCagraHNSW.getName();
} finally {
cuVSResourceManager.release(cuVSResources);
long startTime = System.nanoTime();
var indexBuilder = CagraIndex.newBuilder(cuVSResources).withDataset(dataset).withIndexParams(params);
var index = indexBuilder.build();
cuVSResourceManager.finishedComputation(cuVSResources);
if (logger.isDebugEnabled()) {
logger.debug("Carga index created in: {} ms; #num vectors: {}", (System.nanoTime() - startTime) / 1_000_000.0, dataset.size());
}
return index;
}

@SuppressForbidden(reason = "require usage of Lucene's IOUtils#deleteFilesIgnoringExceptions(...)")
private HnswGraph writeGraph(String tempCagraHNSWFileName, int[][] levelNodeOffsets) throws IOException {
private HnswGraph writeGraph(CuVSMatrix cagraGraph, int[][] levelNodeOffsets) throws IOException {
long startTime = System.nanoTime();
boolean success = false;
IndexInput tempCagraHNSWInput = null;
int maxElementCount;
int maxGraphDegree;

try {
tempCagraHNSWInput = segmentWriteState.directory.openInput(tempCagraHNSWFileName, segmentWriteState.context);
// read the metadata from the hnlswlib format;
// some of them are not used in the Lucene HNSW format
tempCagraHNSWInput.readLong(); // offSetLevel0
maxElementCount = (int) tempCagraHNSWInput.readLong();
tempCagraHNSWInput.readLong(); // currElementCount
tempCagraHNSWInput.readLong(); // sizeDataPerElement
long labelOffset = tempCagraHNSWInput.readLong();
long dataOffset = tempCagraHNSWInput.readLong();
int maxLevel = tempCagraHNSWInput.readInt();
tempCagraHNSWInput.readInt(); // entryPointNode
tempCagraHNSWInput.readLong(); // maxM
long maxM0 = tempCagraHNSWInput.readLong(); // number of graph connections
tempCagraHNSWInput.readLong(); // M
tempCagraHNSWInput.readLong(); // mult
tempCagraHNSWInput.readLong(); // efConstruction

assert (maxLevel == 1) : "Cagra index is flat, maxLevel must be: 1, got: " + maxLevel;
maxGraphDegree = (int) maxM0;
int[] neighbors = new int[maxGraphDegree];
int dimension = (int) ((labelOffset - dataOffset) / Float.BYTES);
// assert (dimension == dimensionCalculated)
// : "Cagra index vector dimension must be: " + dimension + ", got: " + dimensionCalculated;

levelNodeOffsets[0] = new int[maxElementCount];

// read graph from the cagra_hnswlib index and write it to the Lucene vectorIndex file
int[] scratch = new int[maxGraphDegree];
for (int node = 0; node < maxElementCount; node++) {
// read from the cagra_hnswlib index
int nodeDegree = tempCagraHNSWInput.readInt();
assert (nodeDegree == maxGraphDegree)
: "In Cagra graph all nodes must have the same number of connections : " + maxGraphDegree + ", got" + nodeDegree;
for (int i = 0; i < nodeDegree; i++) {
neighbors[i] = tempCagraHNSWInput.readInt();
}
// Skip over the vector data
tempCagraHNSWInput.seek(tempCagraHNSWInput.getFilePointer() + dimension * Float.BYTES);
// Skip over the label/id
tempCagraHNSWInput.seek(tempCagraHNSWInput.getFilePointer() + Long.BYTES);

// write to the Lucene vectorIndex file
long offsetStart = vectorIndex.getFilePointer();
Arrays.sort(neighbors);
int actualSize = 0;
scratch[actualSize++] = neighbors[0];
for (int i = 1; i < nodeDegree; i++) {
assert neighbors[i] < maxElementCount : "node too large: " + neighbors[i] + ">=" + maxElementCount;
if (neighbors[i - 1] == neighbors[i]) {
continue;
}
scratch[actualSize++] = neighbors[i] - neighbors[i - 1];
}
// Write the size after duplicates are removed
vectorIndex.writeVInt(actualSize);
for (int i = 0; i < actualSize; i++) {
vectorIndex.writeVInt(scratch[i]);
int maxElementCount = (int) cagraGraph.size();
int maxGraphDegree = (int) cagraGraph.columns();
int[] neighbors = new int[maxGraphDegree];

// write the cagra graph to the Lucene vectorIndex file
int[] scratch = new int[maxGraphDegree];
for (int node = 0; node < maxElementCount; node++) {
cagraGraph.getRow(node).toArray(neighbors);

// write to the Lucene vectorIndex file
long offsetStart = vectorIndex.getFilePointer();
Arrays.sort(neighbors);
int actualSize = 0;
scratch[actualSize++] = neighbors[0];
for (int i = 1; i < maxGraphDegree; i++) {
assert neighbors[i] < maxElementCount : "node too large: " + neighbors[i] + ">=" + maxElementCount;
if (neighbors[i - 1] == neighbors[i]) {
continue;
}
levelNodeOffsets[0][node] = Math.toIntExact(vectorIndex.getFilePointer() - offsetStart);
}
if (logger.isDebugEnabled()) {
logger.debug("cagra_hnws index serialized to Lucene HNSW in: {} ms", (System.nanoTime() - startTime) / 1_000_000.0);
scratch[actualSize++] = neighbors[i] - neighbors[i - 1];
}
success = true;
} finally {
if (success) {
IOUtils.close(tempCagraHNSWInput);
} else {
IOUtils.closeWhileHandlingException(tempCagraHNSWInput);
// Write the size after duplicates are removed
vectorIndex.writeVInt(actualSize);
for (int i = 0; i < actualSize; i++) {
vectorIndex.writeVInt(scratch[i]);
}
org.apache.lucene.util.IOUtils.deleteFilesIgnoringExceptions(segmentWriteState.directory, tempCagraHNSWFileName);
levelNodeOffsets[0][node] = Math.toIntExact(vectorIndex.getFilePointer() - offsetStart);
}
if (logger.isDebugEnabled()) {
logger.debug("cagra_hnws index serialized to Lucene HNSW in: {} ms", (System.nanoTime() - startTime) / 1_000_000.0);
}
return createMockGraph(maxElementCount, maxGraphDegree);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

package org.elasticsearch.xpack.gpu.codec;

import com.nvidia.cuvs.Dataset;
import com.nvidia.cuvs.CuVSMatrix;
import com.nvidia.cuvs.spi.CuVSProvider;

import org.apache.lucene.store.MemorySegmentAccessInput;
Expand All @@ -20,15 +20,15 @@ public class DatasetUtilsImpl implements DatasetUtils {

private static final DatasetUtils INSTANCE = new DatasetUtilsImpl();

private static final MethodHandle createDataset$mh = CuVSProvider.provider().newNativeDatasetBuilder();
private static final MethodHandle createDataset$mh = CuVSProvider.provider().newNativeMatrixBuilder();

static DatasetUtils getInstance() {
return INSTANCE;
}

static Dataset fromMemorySegment(MemorySegment memorySegment, int size, int dimensions) {
static CuVSMatrix fromMemorySegment(MemorySegment memorySegment, int size, int dimensions, CuVSMatrix.DataType dataType) {
try {
return (Dataset) createDataset$mh.invokeExact(memorySegment, size, dimensions);
return (CuVSMatrix) createDataset$mh.invokeExact(memorySegment, size, dimensions, dataType);
} catch (Throwable e) {
if (e instanceof Error err) {
throw err;
Expand All @@ -43,7 +43,7 @@ static Dataset fromMemorySegment(MemorySegment memorySegment, int size, int dime
private DatasetUtilsImpl() {}

@Override
public Dataset fromInput(MemorySegmentAccessInput input, int numVectors, int dims) throws IOException {
public CuVSMatrix fromInput(MemorySegmentAccessInput input, int numVectors, int dims) throws IOException {
if (numVectors < 0 || dims < 0) {
throwIllegalArgumentException(numVectors, dims);
}
Expand All @@ -52,7 +52,7 @@ public Dataset fromInput(MemorySegmentAccessInput input, int numVectors, int dim
if (((long) numVectors * dims * Float.BYTES) > ms.byteSize()) {
throwIllegalArgumentException(ms, numVectors, dims);
}
return fromMemorySegment(ms, numVectors, dims);
return fromMemorySegment(ms, numVectors, dims, CuVSMatrix.DataType.FLOAT);
}

static void throwIllegalArgumentException(MemorySegment ms, int numVectors, int dims) {
Expand Down