Skip to content

Commit e6e9c17

Browse files
authored
Use new cuvs version 25.10; direct access to cagra graph via CuVSMatrix (#132832)
This PR updates cuvs-java dependency to 25.10 (I left 25.08 and updated its verification metadata to the final version for convenience in case we want to go back). It uses CuVSMatrix as a way to transfer data efficiently from GPU memory to the Java heap directly (and then to a Lucene file). I tried to keep changes at a minimum, but some restructuring was necessary (e.g. resource management need to be done at a upper level - we need to keep hold of the resource until we finished reading the CuVSMatrix).
1 parent 446fba7 commit e6e9c17

File tree

9 files changed

+78
-153
lines changed

9 files changed

+78
-153
lines changed

gradle/verification-metadata.xml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@
1515
<trust group="org.elasticsearch.plugin"/>
1616
<trust file=".*-javadoc[.]jar" regex="true"/>
1717
<trust file=".*-sources[.]jar" regex="true"/>
18+
19+
<!-- This is here because we are currently use nightly builds, and the checksum keeps changing -->
20+
<!-- TODO: move to a proper entry when the official cuvs-java is released -->
21+
<trust group="com.nvidia.cuvs" name="cuvs-java" version="25.10.0" />
1822
</trusted-artifacts>
1923
</configuration>
2024
<components>
@@ -1130,7 +1134,7 @@
11301134
</component>
11311135
<component group="com.nvidia.cuvs" name="cuvs-java" version="25.08.0">
11321136
<artifact name="cuvs-java-25.08.0.jar">
1133-
<sha256 value="edec77b7b3cc20d7cc32f97a66f50bf805ed3c88eac12f1a6b43cdabb062a007" origin="Generated by Gradle"/>
1137+
<sha256 value="d7ae03068bb58e71c3f6b7cb05fde1665bb0121a9daf0b6ce86502bc1a5829d5" origin="Generated by Gradle"/>
11341138
</artifact>
11351139
</component>
11361140
<component group="com.perforce" name="p4java" version="2015.2.1365273">

x-pack/plugin/gpu/build.gradle

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,12 @@ repositories {
2020
dependencies {
2121
compileOnly project(path: xpackModule('core'))
2222
compileOnly project(':server')
23-
implementation 'com.nvidia.cuvs:cuvs-java:25.08.0'
23+
implementation 'com.nvidia.cuvs:cuvs-java:25.10.0'
2424
testImplementation(testArtifact(project(xpackModule('core'))))
2525
testImplementation(testArtifact(project(':server')))
2626
clusterModules project(xpackModule('gpu'))
2727
}
28-
tasks.named("yamlRestTest") {
29-
usesDefaultDistribution("uses gpu plugin")
30-
}
28+
3129
artifacts {
3230
restXpackTests(new File(projectDir, "src/yamlRestTest/resources/rest-api-spec/test"))
3331
}

x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ public static CuVSResources cuVSResourcesOrNull(boolean logError) {
4646
if (t instanceof ExceptionInInitializerError ex) {
4747
t = ex.getCause();
4848
}
49-
LOG.warn("Exception occurred during creation of cuvs resources. " + t);
49+
LOG.warn("Exception occurred during creation of cuvs resources", t);
5050
}
5151
}
5252
return null;

x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtils.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
package org.elasticsearch.xpack.gpu.codec;
99

10-
import com.nvidia.cuvs.Dataset;
10+
import com.nvidia.cuvs.CuVSMatrix;
1111

1212
import org.apache.lucene.store.MemorySegmentAccessInput;
1313

@@ -20,6 +20,5 @@ static DatasetUtils getInstance() {
2020
}
2121

2222
/** Returns a Dataset over the float32 vectors in the input. */
23-
Dataset fromInput(MemorySegmentAccessInput input, int numVectors, int dims) throws IOException;
24-
23+
CuVSMatrix fromInput(MemorySegmentAccessInput input, int numVectors, int dims) throws IOException;
2524
}

x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,10 @@
77

88
package org.elasticsearch.xpack.gpu.codec;
99

10-
import com.nvidia.cuvs.Dataset;
10+
import com.nvidia.cuvs.CuVSMatrix;
1111

1212
import org.apache.lucene.store.MemorySegmentAccessInput;
1313

14-
import java.io.IOException;
15-
1614
/** Stubb holder - never executed. */
1715
public class DatasetUtilsImpl implements DatasetUtils {
1816

@@ -21,7 +19,7 @@ static DatasetUtils getInstance() {
2119
}
2220

2321
@Override
24-
public Dataset fromInput(MemorySegmentAccessInput input, int numVectors, int dims) throws IOException {
22+
public CuVSMatrix fromInput(MemorySegmentAccessInput input, int numVectors, int dims) {
2523
throw new UnsupportedOperationException("should not reach here");
2624
}
2725
}

x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java

Lines changed: 58 additions & 133 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
import com.nvidia.cuvs.CagraIndex;
1111
import com.nvidia.cuvs.CagraIndexParams;
12-
import com.nvidia.cuvs.Dataset;
12+
import com.nvidia.cuvs.CuVSMatrix;
1313

1414
import org.apache.lucene.codecs.CodecUtil;
1515
import org.apache.lucene.codecs.KnnFieldVectorsWriter;
@@ -35,7 +35,6 @@
3535
import org.apache.lucene.util.hnsw.HnswGraph;
3636
import org.apache.lucene.util.hnsw.HnswGraph.NodesIterator;
3737
import org.apache.lucene.util.packed.DirectMonotonicWriter;
38-
import org.elasticsearch.common.lucene.store.IndexOutputOutputStream;
3938
import org.elasticsearch.core.IOUtils;
4039
import org.elasticsearch.core.SuppressForbidden;
4140
import org.elasticsearch.logging.LogManager;
@@ -177,21 +176,21 @@ public long ramBytesUsed() {
177176
}
178177

179178
private static final class DatasetOrVectors {
180-
private final Dataset dataset;
179+
private final CuVSMatrix dataset;
181180
private final float[][] vectors;
182181

183182
static DatasetOrVectors fromArray(float[][] vectors) {
184183
return new DatasetOrVectors(
185-
vectors.length < MIN_NUM_VECTORS_FOR_GPU_BUILD ? null : Dataset.ofArray(vectors),
184+
vectors.length < MIN_NUM_VECTORS_FOR_GPU_BUILD ? null : CuVSMatrix.ofArray(vectors),
186185
vectors.length < MIN_NUM_VECTORS_FOR_GPU_BUILD ? vectors : null
187186
);
188187
}
189188

190-
static DatasetOrVectors fromDataset(Dataset dataset) {
189+
static DatasetOrVectors fromDataset(CuVSMatrix dataset) {
191190
return new DatasetOrVectors(dataset, null);
192191
}
193192

194-
private DatasetOrVectors(Dataset dataset, float[][] vectors) {
193+
private DatasetOrVectors(CuVSMatrix dataset, float[][] vectors) {
195194
this.dataset = dataset;
196195
this.vectors = vectors;
197196
validateState();
@@ -204,10 +203,10 @@ private void validateState() {
204203
}
205204

206205
int size() {
207-
return dataset != null ? dataset.size() : vectors.length;
206+
return dataset != null ? (int) dataset.size() : vectors.length;
208207
}
209208

210-
Dataset getDataset() {
209+
CuVSMatrix getDataset() {
211210
return dataset;
212211
}
213212

@@ -243,9 +242,16 @@ private void writeFieldInternal(FieldInfo fieldInfo, DatasetOrVectors datasetOrV
243242
}
244243
mockGraph = writeGraph(vectors, graphLevelNodeOffsets);
245244
} else {
246-
String tempCagraHNSWFileName = buildGPUIndex(fieldInfo.getVectorSimilarityFunction(), datasetOrVectors.dataset);
247-
assert tempCagraHNSWFileName != null : "GPU index should be built for field: " + fieldInfo.name;
248-
mockGraph = writeGraph(tempCagraHNSWFileName, graphLevelNodeOffsets);
245+
var dataset = datasetOrVectors.dataset;
246+
var cuVSResources = cuVSResourceManager.acquire((int) dataset.size(), (int) dataset.columns());
247+
try {
248+
try (var index = buildGPUIndex(cuVSResources, fieldInfo.getVectorSimilarityFunction(), dataset)) {
249+
assert index != null : "GPU index should be built for field: " + fieldInfo.name;
250+
mockGraph = writeGraph(index.getGraph(), graphLevelNodeOffsets);
251+
}
252+
} finally {
253+
cuVSResourceManager.release(cuVSResources);
254+
}
249255
}
250256
long vectorIndexLength = vectorIndex.getFilePointer() - vectorIndexOffset;
251257
writeMeta(fieldInfo, vectorIndexOffset, vectorIndexLength, datasetOrVectors.size(), mockGraph, graphLevelNodeOffsets);
@@ -256,8 +262,11 @@ private void writeFieldInternal(FieldInfo fieldInfo, DatasetOrVectors datasetOrV
256262
}
257263
}
258264

259-
@SuppressForbidden(reason = "require usage of Lucene's IOUtils#deleteFilesIgnoringExceptions(...)")
260-
private String buildGPUIndex(VectorSimilarityFunction similarityFunction, Dataset dataset) throws Throwable {
265+
private CagraIndex buildGPUIndex(
266+
CuVSResourceManager.ManagedCuVSResources cuVSResources,
267+
VectorSimilarityFunction similarityFunction,
268+
CuVSMatrix dataset
269+
) throws Throwable {
261270
CagraIndexParams.CuvsDistanceType distanceType = switch (similarityFunction) {
262271
case EUCLIDEAN -> CagraIndexParams.CuvsDistanceType.L2Expanded;
263272
case DOT_PRODUCT, MAXIMUM_INNER_PRODUCT -> CagraIndexParams.CuvsDistanceType.InnerProduct;
@@ -271,134 +280,50 @@ private String buildGPUIndex(VectorSimilarityFunction similarityFunction, Datase
271280
.withMetric(distanceType)
272281
.build();
273282

274-
var cuVSResources = cuVSResourceManager.acquire(dataset.size(), dataset.dimensions());
275-
try {
276-
long startTime = System.nanoTime();
277-
var indexBuilder = CagraIndex.newBuilder(cuVSResources).withDataset(dataset).withIndexParams(params);
278-
var index = indexBuilder.build();
279-
cuVSResourceManager.finishedComputation(cuVSResources);
280-
if (logger.isDebugEnabled()) {
281-
logger.debug(
282-
"Carga index created in: {} ms; #num vectors: {}",
283-
(System.nanoTime() - startTime) / 1_000_000.0,
284-
dataset.size()
285-
);
286-
}
287-
288-
// TODO: do serialization through MemorySegment instead of a temp file
289-
// serialize index for CPU consumption to the hnwslib format
290-
startTime = System.nanoTime();
291-
IndexOutput tempCagraHNSW = null;
292-
boolean success = false;
293-
try {
294-
tempCagraHNSW = segmentWriteState.directory.createTempOutput(
295-
vectorIndex.getName(),
296-
"cagra_hnws_temp",
297-
segmentWriteState.context
298-
);
299-
var tempCagraHNSWOutputStream = new IndexOutputOutputStream(tempCagraHNSW);
300-
index.serializeToHNSW(tempCagraHNSWOutputStream);
301-
if (logger.isDebugEnabled()) {
302-
logger.debug("Carga index serialized to hnswlib format in: {} ms", (System.nanoTime() - startTime) / 1_000_000.0);
303-
}
304-
success = true;
305-
} finally {
306-
index.destroyIndex();
307-
if (success) {
308-
org.elasticsearch.core.IOUtils.close(tempCagraHNSW);
309-
} else {
310-
if (tempCagraHNSW != null) {
311-
IOUtils.closeWhileHandlingException(tempCagraHNSW);
312-
org.apache.lucene.util.IOUtils.deleteFilesIgnoringExceptions(segmentWriteState.directory, tempCagraHNSW.getName());
313-
}
314-
}
315-
}
316-
return tempCagraHNSW.getName();
317-
} finally {
318-
cuVSResourceManager.release(cuVSResources);
283+
long startTime = System.nanoTime();
284+
var indexBuilder = CagraIndex.newBuilder(cuVSResources).withDataset(dataset).withIndexParams(params);
285+
var index = indexBuilder.build();
286+
cuVSResourceManager.finishedComputation(cuVSResources);
287+
if (logger.isDebugEnabled()) {
288+
logger.debug("Carga index created in: {} ms; #num vectors: {}", (System.nanoTime() - startTime) / 1_000_000.0, dataset.size());
319289
}
290+
return index;
320291
}
321292

322-
@SuppressForbidden(reason = "require usage of Lucene's IOUtils#deleteFilesIgnoringExceptions(...)")
323-
private HnswGraph writeGraph(String tempCagraHNSWFileName, int[][] levelNodeOffsets) throws IOException {
293+
private HnswGraph writeGraph(CuVSMatrix cagraGraph, int[][] levelNodeOffsets) throws IOException {
324294
long startTime = System.nanoTime();
325-
boolean success = false;
326-
IndexInput tempCagraHNSWInput = null;
327-
int maxElementCount;
328-
int maxGraphDegree;
329295

330-
try {
331-
tempCagraHNSWInput = segmentWriteState.directory.openInput(tempCagraHNSWFileName, segmentWriteState.context);
332-
// read the metadata from the hnlswlib format;
333-
// some of them are not used in the Lucene HNSW format
334-
tempCagraHNSWInput.readLong(); // offSetLevel0
335-
maxElementCount = (int) tempCagraHNSWInput.readLong();
336-
tempCagraHNSWInput.readLong(); // currElementCount
337-
tempCagraHNSWInput.readLong(); // sizeDataPerElement
338-
long labelOffset = tempCagraHNSWInput.readLong();
339-
long dataOffset = tempCagraHNSWInput.readLong();
340-
int maxLevel = tempCagraHNSWInput.readInt();
341-
tempCagraHNSWInput.readInt(); // entryPointNode
342-
tempCagraHNSWInput.readLong(); // maxM
343-
long maxM0 = tempCagraHNSWInput.readLong(); // number of graph connections
344-
tempCagraHNSWInput.readLong(); // M
345-
tempCagraHNSWInput.readLong(); // mult
346-
tempCagraHNSWInput.readLong(); // efConstruction
347-
348-
assert (maxLevel == 1) : "Cagra index is flat, maxLevel must be: 1, got: " + maxLevel;
349-
maxGraphDegree = (int) maxM0;
350-
int[] neighbors = new int[maxGraphDegree];
351-
int dimension = (int) ((labelOffset - dataOffset) / Float.BYTES);
352-
// assert (dimension == dimensionCalculated)
353-
// : "Cagra index vector dimension must be: " + dimension + ", got: " + dimensionCalculated;
354-
355-
levelNodeOffsets[0] = new int[maxElementCount];
356-
357-
// read graph from the cagra_hnswlib index and write it to the Lucene vectorIndex file
358-
int[] scratch = new int[maxGraphDegree];
359-
for (int node = 0; node < maxElementCount; node++) {
360-
// read from the cagra_hnswlib index
361-
int nodeDegree = tempCagraHNSWInput.readInt();
362-
assert (nodeDegree == maxGraphDegree)
363-
: "In Cagra graph all nodes must have the same number of connections : " + maxGraphDegree + ", got" + nodeDegree;
364-
for (int i = 0; i < nodeDegree; i++) {
365-
neighbors[i] = tempCagraHNSWInput.readInt();
366-
}
367-
// Skip over the vector data
368-
tempCagraHNSWInput.seek(tempCagraHNSWInput.getFilePointer() + dimension * Float.BYTES);
369-
// Skip over the label/id
370-
tempCagraHNSWInput.seek(tempCagraHNSWInput.getFilePointer() + Long.BYTES);
371-
372-
// write to the Lucene vectorIndex file
373-
long offsetStart = vectorIndex.getFilePointer();
374-
Arrays.sort(neighbors);
375-
int actualSize = 0;
376-
scratch[actualSize++] = neighbors[0];
377-
for (int i = 1; i < nodeDegree; i++) {
378-
assert neighbors[i] < maxElementCount : "node too large: " + neighbors[i] + ">=" + maxElementCount;
379-
if (neighbors[i - 1] == neighbors[i]) {
380-
continue;
381-
}
382-
scratch[actualSize++] = neighbors[i] - neighbors[i - 1];
383-
}
384-
// Write the size after duplicates are removed
385-
vectorIndex.writeVInt(actualSize);
386-
for (int i = 0; i < actualSize; i++) {
387-
vectorIndex.writeVInt(scratch[i]);
296+
int maxElementCount = (int) cagraGraph.size();
297+
int maxGraphDegree = (int) cagraGraph.columns();
298+
int[] neighbors = new int[maxGraphDegree];
299+
300+
levelNodeOffsets[0] = new int[maxElementCount];
301+
// write the cagra graph to the Lucene vectorIndex file
302+
int[] scratch = new int[maxGraphDegree];
303+
for (int node = 0; node < maxElementCount; node++) {
304+
cagraGraph.getRow(node).toArray(neighbors);
305+
306+
// write to the Lucene vectorIndex file
307+
long offsetStart = vectorIndex.getFilePointer();
308+
Arrays.sort(neighbors);
309+
int actualSize = 0;
310+
scratch[actualSize++] = neighbors[0];
311+
for (int i = 1; i < maxGraphDegree; i++) {
312+
assert neighbors[i] < maxElementCount : "node too large: " + neighbors[i] + ">=" + maxElementCount;
313+
if (neighbors[i - 1] == neighbors[i]) {
314+
continue;
388315
}
389-
levelNodeOffsets[0][node] = Math.toIntExact(vectorIndex.getFilePointer() - offsetStart);
390-
}
391-
if (logger.isDebugEnabled()) {
392-
logger.debug("cagra_hnws index serialized to Lucene HNSW in: {} ms", (System.nanoTime() - startTime) / 1_000_000.0);
316+
scratch[actualSize++] = neighbors[i] - neighbors[i - 1];
393317
}
394-
success = true;
395-
} finally {
396-
if (success) {
397-
IOUtils.close(tempCagraHNSWInput);
398-
} else {
399-
IOUtils.closeWhileHandlingException(tempCagraHNSWInput);
318+
// Write the size after duplicates are removed
319+
vectorIndex.writeVInt(actualSize);
320+
for (int i = 0; i < actualSize; i++) {
321+
vectorIndex.writeVInt(scratch[i]);
400322
}
401-
org.apache.lucene.util.IOUtils.deleteFilesIgnoringExceptions(segmentWriteState.directory, tempCagraHNSWFileName);
323+
levelNodeOffsets[0][node] = Math.toIntExact(vectorIndex.getFilePointer() - offsetStart);
324+
}
325+
if (logger.isDebugEnabled()) {
326+
logger.debug("cagra_hnws index serialized to Lucene HNSW in: {} ms", (System.nanoTime() - startTime) / 1_000_000.0);
402327
}
403328
return createMockGraph(maxElementCount, maxGraphDegree);
404329
}

x-pack/plugin/gpu/src/main21/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
package org.elasticsearch.xpack.gpu.codec;
99

10-
import com.nvidia.cuvs.Dataset;
10+
import com.nvidia.cuvs.CuVSMatrix;
1111
import com.nvidia.cuvs.spi.CuVSProvider;
1212

1313
import org.apache.lucene.store.MemorySegmentAccessInput;
@@ -20,15 +20,15 @@ public class DatasetUtilsImpl implements DatasetUtils {
2020

2121
private static final DatasetUtils INSTANCE = new DatasetUtilsImpl();
2222

23-
private static final MethodHandle createDataset$mh = CuVSProvider.provider().newNativeDatasetBuilder();
23+
private static final MethodHandle createDataset$mh = CuVSProvider.provider().newNativeMatrixBuilder();
2424

2525
static DatasetUtils getInstance() {
2626
return INSTANCE;
2727
}
2828

29-
static Dataset fromMemorySegment(MemorySegment memorySegment, int size, int dimensions) {
29+
static CuVSMatrix fromMemorySegment(MemorySegment memorySegment, int size, int dimensions, CuVSMatrix.DataType dataType) {
3030
try {
31-
return (Dataset) createDataset$mh.invokeExact(memorySegment, size, dimensions);
31+
return (CuVSMatrix) createDataset$mh.invokeExact(memorySegment, size, dimensions, dataType);
3232
} catch (Throwable e) {
3333
if (e instanceof Error err) {
3434
throw err;
@@ -43,7 +43,7 @@ static Dataset fromMemorySegment(MemorySegment memorySegment, int size, int dime
4343
private DatasetUtilsImpl() {}
4444

4545
@Override
46-
public Dataset fromInput(MemorySegmentAccessInput input, int numVectors, int dims) throws IOException {
46+
public CuVSMatrix fromInput(MemorySegmentAccessInput input, int numVectors, int dims) throws IOException {
4747
if (numVectors < 0 || dims < 0) {
4848
throwIllegalArgumentException(numVectors, dims);
4949
}
@@ -52,7 +52,7 @@ public Dataset fromInput(MemorySegmentAccessInput input, int numVectors, int dim
5252
if (((long) numVectors * dims * Float.BYTES) > ms.byteSize()) {
5353
throwIllegalArgumentException(ms, numVectors, dims);
5454
}
55-
return fromMemorySegment(ms, numVectors, dims);
55+
return fromMemorySegment(ms, numVectors, dims, CuVSMatrix.DataType.FLOAT);
5656
}
5757

5858
static void throwIllegalArgumentException(MemorySegment ms, int numVectors, int dims) {

0 commit comments

Comments
 (0)