Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion gradle/verification-metadata.xml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@
<trust group="org.elasticsearch.plugin"/>
<trust file=".*-javadoc[.]jar" regex="true"/>
<trust file=".*-sources[.]jar" regex="true"/>

<!-- This is here because we are currently use nightly builds, and the checksum keeps changing -->
<!-- TODO: move to a proper entry when the official cuvs-java is released -->
<trust group="com.nvidia.cuvs" name="cuvs-java" version="25.10.0" />
</trusted-artifacts>
</configuration>
<components>
Expand Down Expand Up @@ -1130,7 +1134,7 @@
</component>
<component group="com.nvidia.cuvs" name="cuvs-java" version="25.08.0">
<artifact name="cuvs-java-25.08.0.jar">
<sha256 value="edec77b7b3cc20d7cc32f97a66f50bf805ed3c88eac12f1a6b43cdabb062a007" origin="Generated by Gradle"/>
<sha256 value="d7ae03068bb58e71c3f6b7cb05fde1665bb0121a9daf0b6ce86502bc1a5829d5" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="com.perforce" name="p4java" version="2015.2.1365273">
Expand Down
6 changes: 2 additions & 4 deletions x-pack/plugin/gpu/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,12 @@ repositories {
dependencies {
compileOnly project(path: xpackModule('core'))
compileOnly project(':server')
implementation 'com.nvidia.cuvs:cuvs-java:25.08.0'
implementation 'com.nvidia.cuvs:cuvs-java:25.10.0'
testImplementation(testArtifact(project(xpackModule('core'))))
testImplementation(testArtifact(project(':server')))
clusterModules project(xpackModule('gpu'))
}
tasks.named("yamlRestTest") {
usesDefaultDistribution("uses gpu plugin")
}

artifacts {
restXpackTests(new File(projectDir, "src/yamlRestTest/resources/rest-api-spec/test"))
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public static CuVSResources cuVSResourcesOrNull(boolean logError) {
if (t instanceof ExceptionInInitializerError ex) {
t = ex.getCause();
}
LOG.warn("Exception occurred during creation of cuvs resources. " + t);
LOG.warn("Exception occurred during creation of cuvs resources", t);
}
}
return null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

package org.elasticsearch.xpack.gpu.codec;

import com.nvidia.cuvs.Dataset;
import com.nvidia.cuvs.CuVSMatrix;

import org.apache.lucene.store.MemorySegmentAccessInput;

Expand All @@ -20,6 +20,5 @@ static DatasetUtils getInstance() {
}

/** Returns a Dataset over the float32 vectors in the input. */
Dataset fromInput(MemorySegmentAccessInput input, int numVectors, int dims) throws IOException;

CuVSMatrix fromInput(MemorySegmentAccessInput input, int numVectors, int dims) throws IOException;
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,10 @@

package org.elasticsearch.xpack.gpu.codec;

import com.nvidia.cuvs.Dataset;
import com.nvidia.cuvs.CuVSMatrix;

import org.apache.lucene.store.MemorySegmentAccessInput;

import java.io.IOException;

/** Stubb holder - never executed. */
public class DatasetUtilsImpl implements DatasetUtils {

Expand All @@ -21,7 +19,7 @@ static DatasetUtils getInstance() {
}

@Override
public Dataset fromInput(MemorySegmentAccessInput input, int numVectors, int dims) throws IOException {
public CuVSMatrix fromInput(MemorySegmentAccessInput input, int numVectors, int dims) {
throw new UnsupportedOperationException("should not reach here");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

import com.nvidia.cuvs.CagraIndex;
import com.nvidia.cuvs.CagraIndexParams;
import com.nvidia.cuvs.Dataset;
import com.nvidia.cuvs.CuVSMatrix;

import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.KnnFieldVectorsWriter;
Expand All @@ -35,7 +35,6 @@
import org.apache.lucene.util.hnsw.HnswGraph;
import org.apache.lucene.util.hnsw.HnswGraph.NodesIterator;
import org.apache.lucene.util.packed.DirectMonotonicWriter;
import org.elasticsearch.common.lucene.store.IndexOutputOutputStream;
import org.elasticsearch.core.IOUtils;
import org.elasticsearch.core.SuppressForbidden;
import org.elasticsearch.logging.LogManager;
Expand Down Expand Up @@ -177,21 +176,21 @@ public long ramBytesUsed() {
}

private static final class DatasetOrVectors {
private final Dataset dataset;
private final CuVSMatrix dataset;
private final float[][] vectors;

static DatasetOrVectors fromArray(float[][] vectors) {
return new DatasetOrVectors(
vectors.length < MIN_NUM_VECTORS_FOR_GPU_BUILD ? null : Dataset.ofArray(vectors),
vectors.length < MIN_NUM_VECTORS_FOR_GPU_BUILD ? null : CuVSMatrix.ofArray(vectors),
vectors.length < MIN_NUM_VECTORS_FOR_GPU_BUILD ? vectors : null
);
}

static DatasetOrVectors fromDataset(Dataset dataset) {
static DatasetOrVectors fromDataset(CuVSMatrix dataset) {
return new DatasetOrVectors(dataset, null);
}

private DatasetOrVectors(Dataset dataset, float[][] vectors) {
private DatasetOrVectors(CuVSMatrix dataset, float[][] vectors) {
this.dataset = dataset;
this.vectors = vectors;
validateState();
Expand All @@ -204,10 +203,10 @@ private void validateState() {
}

int size() {
return dataset != null ? dataset.size() : vectors.length;
return dataset != null ? (int) dataset.size() : vectors.length;
}

Dataset getDataset() {
CuVSMatrix getDataset() {
return dataset;
}

Expand Down Expand Up @@ -243,9 +242,16 @@ private void writeFieldInternal(FieldInfo fieldInfo, DatasetOrVectors datasetOrV
}
mockGraph = writeGraph(vectors, graphLevelNodeOffsets);
} else {
String tempCagraHNSWFileName = buildGPUIndex(fieldInfo.getVectorSimilarityFunction(), datasetOrVectors.dataset);
assert tempCagraHNSWFileName != null : "GPU index should be built for field: " + fieldInfo.name;
mockGraph = writeGraph(tempCagraHNSWFileName, graphLevelNodeOffsets);
var dataset = datasetOrVectors.dataset;
var cuVSResources = cuVSResourceManager.acquire((int) dataset.size(), (int) dataset.columns());
try {
try (var index = buildGPUIndex(cuVSResources, fieldInfo.getVectorSimilarityFunction(), dataset)) {
assert index != null : "GPU index should be built for field: " + fieldInfo.name;
mockGraph = writeGraph(index.getGraph(), graphLevelNodeOffsets);
}
} finally {
cuVSResourceManager.release(cuVSResources);
}
}
long vectorIndexLength = vectorIndex.getFilePointer() - vectorIndexOffset;
writeMeta(fieldInfo, vectorIndexOffset, vectorIndexLength, datasetOrVectors.size(), mockGraph, graphLevelNodeOffsets);
Expand All @@ -256,8 +262,11 @@ private void writeFieldInternal(FieldInfo fieldInfo, DatasetOrVectors datasetOrV
}
}

@SuppressForbidden(reason = "require usage of Lucene's IOUtils#deleteFilesIgnoringExceptions(...)")
private String buildGPUIndex(VectorSimilarityFunction similarityFunction, Dataset dataset) throws Throwable {
private CagraIndex buildGPUIndex(
CuVSResourceManager.ManagedCuVSResources cuVSResources,
VectorSimilarityFunction similarityFunction,
CuVSMatrix dataset
) throws Throwable {
CagraIndexParams.CuvsDistanceType distanceType = switch (similarityFunction) {
case EUCLIDEAN -> CagraIndexParams.CuvsDistanceType.L2Expanded;
case DOT_PRODUCT, MAXIMUM_INNER_PRODUCT -> CagraIndexParams.CuvsDistanceType.InnerProduct;
Expand All @@ -271,134 +280,50 @@ private String buildGPUIndex(VectorSimilarityFunction similarityFunction, Datase
.withMetric(distanceType)
.build();

var cuVSResources = cuVSResourceManager.acquire(dataset.size(), dataset.dimensions());
try {
long startTime = System.nanoTime();
var indexBuilder = CagraIndex.newBuilder(cuVSResources).withDataset(dataset).withIndexParams(params);
var index = indexBuilder.build();
cuVSResourceManager.finishedComputation(cuVSResources);
if (logger.isDebugEnabled()) {
logger.debug(
"Carga index created in: {} ms; #num vectors: {}",
(System.nanoTime() - startTime) / 1_000_000.0,
dataset.size()
);
}

// TODO: do serialization through MemorySegment instead of a temp file
// serialize index for CPU consumption to the hnwslib format
startTime = System.nanoTime();
IndexOutput tempCagraHNSW = null;
boolean success = false;
try {
tempCagraHNSW = segmentWriteState.directory.createTempOutput(
vectorIndex.getName(),
"cagra_hnws_temp",
segmentWriteState.context
);
var tempCagraHNSWOutputStream = new IndexOutputOutputStream(tempCagraHNSW);
index.serializeToHNSW(tempCagraHNSWOutputStream);
if (logger.isDebugEnabled()) {
logger.debug("Carga index serialized to hnswlib format in: {} ms", (System.nanoTime() - startTime) / 1_000_000.0);
}
success = true;
} finally {
index.destroyIndex();
if (success) {
org.elasticsearch.core.IOUtils.close(tempCagraHNSW);
} else {
if (tempCagraHNSW != null) {
IOUtils.closeWhileHandlingException(tempCagraHNSW);
org.apache.lucene.util.IOUtils.deleteFilesIgnoringExceptions(segmentWriteState.directory, tempCagraHNSW.getName());
}
}
}
return tempCagraHNSW.getName();
} finally {
cuVSResourceManager.release(cuVSResources);
long startTime = System.nanoTime();
var indexBuilder = CagraIndex.newBuilder(cuVSResources).withDataset(dataset).withIndexParams(params);
var index = indexBuilder.build();
cuVSResourceManager.finishedComputation(cuVSResources);
if (logger.isDebugEnabled()) {
logger.debug("Carga index created in: {} ms; #num vectors: {}", (System.nanoTime() - startTime) / 1_000_000.0, dataset.size());
}
return index;
}

@SuppressForbidden(reason = "require usage of Lucene's IOUtils#deleteFilesIgnoringExceptions(...)")
private HnswGraph writeGraph(String tempCagraHNSWFileName, int[][] levelNodeOffsets) throws IOException {
private HnswGraph writeGraph(CuVSMatrix cagraGraph, int[][] levelNodeOffsets) throws IOException {
long startTime = System.nanoTime();
boolean success = false;
IndexInput tempCagraHNSWInput = null;
int maxElementCount;
int maxGraphDegree;

try {
tempCagraHNSWInput = segmentWriteState.directory.openInput(tempCagraHNSWFileName, segmentWriteState.context);
// read the metadata from the hnlswlib format;
// some of them are not used in the Lucene HNSW format
tempCagraHNSWInput.readLong(); // offSetLevel0
maxElementCount = (int) tempCagraHNSWInput.readLong();
tempCagraHNSWInput.readLong(); // currElementCount
tempCagraHNSWInput.readLong(); // sizeDataPerElement
long labelOffset = tempCagraHNSWInput.readLong();
long dataOffset = tempCagraHNSWInput.readLong();
int maxLevel = tempCagraHNSWInput.readInt();
tempCagraHNSWInput.readInt(); // entryPointNode
tempCagraHNSWInput.readLong(); // maxM
long maxM0 = tempCagraHNSWInput.readLong(); // number of graph connections
tempCagraHNSWInput.readLong(); // M
tempCagraHNSWInput.readLong(); // mult
tempCagraHNSWInput.readLong(); // efConstruction

assert (maxLevel == 1) : "Cagra index is flat, maxLevel must be: 1, got: " + maxLevel;
maxGraphDegree = (int) maxM0;
int[] neighbors = new int[maxGraphDegree];
int dimension = (int) ((labelOffset - dataOffset) / Float.BYTES);
// assert (dimension == dimensionCalculated)
// : "Cagra index vector dimension must be: " + dimension + ", got: " + dimensionCalculated;

levelNodeOffsets[0] = new int[maxElementCount];

// read graph from the cagra_hnswlib index and write it to the Lucene vectorIndex file
int[] scratch = new int[maxGraphDegree];
for (int node = 0; node < maxElementCount; node++) {
// read from the cagra_hnswlib index
int nodeDegree = tempCagraHNSWInput.readInt();
assert (nodeDegree == maxGraphDegree)
: "In Cagra graph all nodes must have the same number of connections : " + maxGraphDegree + ", got" + nodeDegree;
for (int i = 0; i < nodeDegree; i++) {
neighbors[i] = tempCagraHNSWInput.readInt();
}
// Skip over the vector data
tempCagraHNSWInput.seek(tempCagraHNSWInput.getFilePointer() + dimension * Float.BYTES);
// Skip over the label/id
tempCagraHNSWInput.seek(tempCagraHNSWInput.getFilePointer() + Long.BYTES);

// write to the Lucene vectorIndex file
long offsetStart = vectorIndex.getFilePointer();
Arrays.sort(neighbors);
int actualSize = 0;
scratch[actualSize++] = neighbors[0];
for (int i = 1; i < nodeDegree; i++) {
assert neighbors[i] < maxElementCount : "node too large: " + neighbors[i] + ">=" + maxElementCount;
if (neighbors[i - 1] == neighbors[i]) {
continue;
}
scratch[actualSize++] = neighbors[i] - neighbors[i - 1];
}
// Write the size after duplicates are removed
vectorIndex.writeVInt(actualSize);
for (int i = 0; i < actualSize; i++) {
vectorIndex.writeVInt(scratch[i]);
int maxElementCount = (int) cagraGraph.size();
int maxGraphDegree = (int) cagraGraph.columns();
int[] neighbors = new int[maxGraphDegree];

levelNodeOffsets[0] = new int[maxElementCount];
// write the cagra graph to the Lucene vectorIndex file
int[] scratch = new int[maxGraphDegree];
for (int node = 0; node < maxElementCount; node++) {
cagraGraph.getRow(node).toArray(neighbors);

// write to the Lucene vectorIndex file
long offsetStart = vectorIndex.getFilePointer();
Arrays.sort(neighbors);
int actualSize = 0;
scratch[actualSize++] = neighbors[0];
for (int i = 1; i < maxGraphDegree; i++) {
assert neighbors[i] < maxElementCount : "node too large: " + neighbors[i] + ">=" + maxElementCount;
if (neighbors[i - 1] == neighbors[i]) {
continue;
}
levelNodeOffsets[0][node] = Math.toIntExact(vectorIndex.getFilePointer() - offsetStart);
}
if (logger.isDebugEnabled()) {
logger.debug("cagra_hnws index serialized to Lucene HNSW in: {} ms", (System.nanoTime() - startTime) / 1_000_000.0);
scratch[actualSize++] = neighbors[i] - neighbors[i - 1];
}
success = true;
} finally {
if (success) {
IOUtils.close(tempCagraHNSWInput);
} else {
IOUtils.closeWhileHandlingException(tempCagraHNSWInput);
// Write the size after duplicates are removed
vectorIndex.writeVInt(actualSize);
for (int i = 0; i < actualSize; i++) {
vectorIndex.writeVInt(scratch[i]);
}
org.apache.lucene.util.IOUtils.deleteFilesIgnoringExceptions(segmentWriteState.directory, tempCagraHNSWFileName);
levelNodeOffsets[0][node] = Math.toIntExact(vectorIndex.getFilePointer() - offsetStart);
}
if (logger.isDebugEnabled()) {
logger.debug("cagra_hnws index serialized to Lucene HNSW in: {} ms", (System.nanoTime() - startTime) / 1_000_000.0);
}
return createMockGraph(maxElementCount, maxGraphDegree);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

package org.elasticsearch.xpack.gpu.codec;

import com.nvidia.cuvs.Dataset;
import com.nvidia.cuvs.CuVSMatrix;
import com.nvidia.cuvs.spi.CuVSProvider;

import org.apache.lucene.store.MemorySegmentAccessInput;
Expand All @@ -20,15 +20,15 @@ public class DatasetUtilsImpl implements DatasetUtils {

private static final DatasetUtils INSTANCE = new DatasetUtilsImpl();

private static final MethodHandle createDataset$mh = CuVSProvider.provider().newNativeDatasetBuilder();
private static final MethodHandle createDataset$mh = CuVSProvider.provider().newNativeMatrixBuilder();

static DatasetUtils getInstance() {
return INSTANCE;
}

static Dataset fromMemorySegment(MemorySegment memorySegment, int size, int dimensions) {
static CuVSMatrix fromMemorySegment(MemorySegment memorySegment, int size, int dimensions, CuVSMatrix.DataType dataType) {
try {
return (Dataset) createDataset$mh.invokeExact(memorySegment, size, dimensions);
return (CuVSMatrix) createDataset$mh.invokeExact(memorySegment, size, dimensions, dataType);
} catch (Throwable e) {
if (e instanceof Error err) {
throw err;
Expand All @@ -43,7 +43,7 @@ static Dataset fromMemorySegment(MemorySegment memorySegment, int size, int dime
private DatasetUtilsImpl() {}

@Override
public Dataset fromInput(MemorySegmentAccessInput input, int numVectors, int dims) throws IOException {
public CuVSMatrix fromInput(MemorySegmentAccessInput input, int numVectors, int dims) throws IOException {
if (numVectors < 0 || dims < 0) {
throwIllegalArgumentException(numVectors, dims);
}
Expand All @@ -52,7 +52,7 @@ public Dataset fromInput(MemorySegmentAccessInput input, int numVectors, int dim
if (((long) numVectors * dims * Float.BYTES) > ms.byteSize()) {
throwIllegalArgumentException(ms, numVectors, dims);
}
return fromMemorySegment(ms, numVectors, dims);
return fromMemorySegment(ms, numVectors, dims, CuVSMatrix.DataType.FLOAT);
}

static void throwIllegalArgumentException(MemorySegment ms, int numVectors, int dims) {
Expand Down
Loading