From c56c7074faec022676d64816f1eba59a25212827 Mon Sep 17 00:00:00 2001 From: ldematte Date: Fri, 10 Oct 2025 14:28:49 +0200 Subject: [PATCH 1/6] Use the internal raw vector data during merge, avoid additional tmp file --- server/src/main/java/module-info.java | 1 + .../reflect/VectorsFormatReflectionUtils.java | 102 ++++++ .../gpu/codec/ES92GpuHnswVectorsWriter.java | 342 +++++++++++------- .../xpack/gpu/codec/ResourcesHolder.java | 40 ++ 4 files changed, 354 insertions(+), 131 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/VectorsFormatReflectionUtils.java create mode 100644 x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ResourcesHolder.java diff --git a/server/src/main/java/module-info.java b/server/src/main/java/module-info.java index 2987b3849e663..3b9aba2dcce2c 100644 --- a/server/src/main/java/module-info.java +++ b/server/src/main/java/module-info.java @@ -490,6 +490,7 @@ exports org.elasticsearch.lucene.util.automaton; exports org.elasticsearch.index.codec.perfield; exports org.elasticsearch.index.codec.vectors to org.elasticsearch.test.knn, org.elasticsearch.gpu; + exports org.elasticsearch.index.codec.vectors.reflect to org.elasticsearch.gpu; exports org.elasticsearch.index.codec.vectors.es818 to org.elasticsearch.test.knn; exports org.elasticsearch.inference.telemetry; exports org.elasticsearch.index.codec.vectors.diskbbq to org.elasticsearch.test.knn; diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/VectorsFormatReflectionUtils.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/VectorsFormatReflectionUtils.java new file mode 100644 index 0000000000000..974ba740c8e35 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/VectorsFormatReflectionUtils.java @@ -0,0 +1,102 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.codec.vectors.reflect; + +import org.apache.lucene.codecs.lucene95.HasIndexSlice; +import org.apache.lucene.index.FloatVectorValues; +import org.apache.lucene.util.hnsw.CloseableRandomVectorScorerSupplier; +import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier; +import org.apache.lucene.util.quantization.QuantizedByteVectorValues; + +import java.lang.invoke.MethodHandles; +import java.lang.invoke.VarHandle; + +public class VectorsFormatReflectionUtils { + private static final VarHandle FLOAT_SUPPLIER_HANDLE; + private static final VarHandle BYTE_SUPPLIER_HANDLE; + private static final VarHandle FLOAT_VECTORS_HANDLE; + private static final VarHandle BYTE_VECTORS_HANDLE; + + private static final Class FLAT_CLOSEABLE_RANDOM_VECTOR_SCORER_SUPPLIER_CLASS; + private static final Class SCALAR_QUANTIZED_CLOSEABLE_RANDOM_VECTOR_SCORER_SUPPLIER_CLASS; + private static final Class FLOAT_SCORING_SUPPLIER_CLASS; + private static final Class BYTE_SCORING_SUPPLIER_CLASS; + static { + try { + FLAT_CLOSEABLE_RANDOM_VECTOR_SCORER_SUPPLIER_CLASS = Class.forName( + "org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsWriter$FlatCloseableRandomVectorScorerSupplier" + ); + var lookup = MethodHandles.privateLookupIn(FLAT_CLOSEABLE_RANDOM_VECTOR_SCORER_SUPPLIER_CLASS, MethodHandles.lookup()); + FLOAT_SUPPLIER_HANDLE = lookup.findVarHandle( + FLAT_CLOSEABLE_RANDOM_VECTOR_SCORER_SUPPLIER_CLASS, + "supplier", + RandomVectorScorerSupplier.class + ); + + FLOAT_SCORING_SUPPLIER_CLASS = Class.forName( + "org.apache.lucene.internal.vectorization.Lucene99MemorySegmentFloatVectorScorerSupplier" + ); + lookup = MethodHandles.privateLookupIn(FLOAT_SCORING_SUPPLIER_CLASS, MethodHandles.lookup()); + FLOAT_VECTORS_HANDLE = lookup.findVarHandle(FLOAT_SCORING_SUPPLIER_CLASS, "values", FloatVectorValues.class); + + SCALAR_QUANTIZED_CLOSEABLE_RANDOM_VECTOR_SCORER_SUPPLIER_CLASS = Class.forName( + "org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsWriter$ScalarQuantizedCloseableRandomVectorScorerSupplier" + ); + lookup = MethodHandles.privateLookupIn(SCALAR_QUANTIZED_CLOSEABLE_RANDOM_VECTOR_SCORER_SUPPLIER_CLASS, MethodHandles.lookup()); + BYTE_SUPPLIER_HANDLE = lookup.findVarHandle( + SCALAR_QUANTIZED_CLOSEABLE_RANDOM_VECTOR_SCORER_SUPPLIER_CLASS, + "supplier", + RandomVectorScorerSupplier.class + ); + + BYTE_SCORING_SUPPLIER_CLASS = Class.forName("org.elasticsearch.simdvec.internal.Int7SQVectorScorerSupplier"); + lookup = MethodHandles.privateLookupIn(BYTE_SCORING_SUPPLIER_CLASS, MethodHandles.lookup()); + BYTE_VECTORS_HANDLE = lookup.findVarHandle(BYTE_SCORING_SUPPLIER_CLASS, "values", QuantizedByteVectorValues.class); + + } catch (IllegalAccessException e) { + throw new AssertionError("should not happen, check opens", e); + } catch (ReflectiveOperationException e) { + throw new AssertionError(e); + } + } + + public static RandomVectorScorerSupplier getFlatRandomVectorScorerInnerSupplier(CloseableRandomVectorScorerSupplier scorerSupplier) { + if (scorerSupplier.getClass().equals(FLAT_CLOSEABLE_RANDOM_VECTOR_SCORER_SUPPLIER_CLASS)) { + return (RandomVectorScorerSupplier) FLOAT_SUPPLIER_HANDLE.get(scorerSupplier); + } + return null; + } + + public static RandomVectorScorerSupplier getScalarQuantizedRandomVectorScorerInnerSupplier( + CloseableRandomVectorScorerSupplier scorerSupplier + ) { + if (scorerSupplier.getClass().equals(SCALAR_QUANTIZED_CLOSEABLE_RANDOM_VECTOR_SCORER_SUPPLIER_CLASS)) { + return (RandomVectorScorerSupplier) BYTE_SUPPLIER_HANDLE.get(scorerSupplier); + } + return null; + } + + public static HasIndexSlice getFloatScoringSupplierVectorOrNull(RandomVectorScorerSupplier scorerSupplier) { + if (FLOAT_SCORING_SUPPLIER_CLASS.isAssignableFrom(scorerSupplier.getClass())) { + var vectorValues = FLOAT_VECTORS_HANDLE.get(scorerSupplier); + if (vectorValues instanceof HasIndexSlice indexSlice) { + return indexSlice; + } + } + return null; + } + + public static HasIndexSlice getByteScoringSupplierVectorOrNull(RandomVectorScorerSupplier scorerSupplier) { + if (BYTE_SCORING_SUPPLIER_CLASS.isAssignableFrom(scorerSupplier.getClass())) { + return (QuantizedByteVectorValues) BYTE_VECTORS_HANDLE.get(scorerSupplier); + } + return null; + } +} diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsWriter.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsWriter.java index 3916afe77caf9..f415485094faa 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsWriter.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsWriter.java @@ -18,7 +18,6 @@ import org.apache.lucene.codecs.hnsw.FlatVectorsWriter; import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsWriter; import org.apache.lucene.index.ByteVectorValues; -import org.apache.lucene.index.DocsWithFieldSet; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FloatVectorValues; import org.apache.lucene.index.IndexFileNames; @@ -28,26 +27,23 @@ import org.apache.lucene.index.Sorter; import org.apache.lucene.index.VectorEncoding; import org.apache.lucene.index.VectorSimilarityFunction; -import org.apache.lucene.store.Directory; import org.apache.lucene.store.FilterIndexInput; -import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.MemorySegmentAccessInput; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.hnsw.HnswGraph; import org.apache.lucene.util.hnsw.HnswGraph.NodesIterator; +import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier; import org.apache.lucene.util.packed.DirectMonotonicWriter; import org.apache.lucene.util.quantization.ScalarQuantizer; import org.elasticsearch.core.IOUtils; -import org.elasticsearch.core.SuppressForbidden; import org.elasticsearch.index.codec.vectors.ES814ScalarQuantizedVectorsFormat; +import org.elasticsearch.index.codec.vectors.reflect.VectorsFormatReflectionUtils; import org.elasticsearch.logging.LogManager; import org.elasticsearch.logging.Logger; import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -71,6 +67,7 @@ final class ES92GpuHnswVectorsWriter extends KnnVectorsWriter { private static final Logger logger = LogManager.getLogger(ES92GpuHnswVectorsWriter.class); private static final long SHALLOW_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(ES92GpuHnswVectorsWriter.class); private static final int LUCENE99_HNSW_DIRECT_MONOTONIC_BLOCK_SHIFT = 16; + private static final long DIRECT_COPY_THRESHOLD_IN_BYTES = 128 * 1024 * 1024; // 128MB private final CuVSResourceManager cuVSResourceManager; private final SegmentWriteState segmentWriteState; @@ -191,10 +188,14 @@ private void flushFieldsWithoutMemoryMappedFile(Sorter.DocMap sortMap) throws IO // Will not be indexed on the GPU flushFieldWithMockGraph(fieldInfo, numVectors, sortMap); } else { - var cuVSResources = cuVSResourceManager.acquire(numVectors, fieldInfo.getVectorDimension(), CuVSMatrix.DataType.FLOAT); - try { + try ( + var resourcesHolder = new ResourcesHolder( + cuVSResourceManager, + cuVSResourceManager.acquire(numVectors, fieldInfo.getVectorDimension(), CuVSMatrix.DataType.FLOAT) + ) + ) { var builder = CuVSMatrix.deviceBuilder( - cuVSResources, + resourcesHolder.resources(), numVectors, fieldInfo.getVectorDimension(), CuVSMatrix.DataType.FLOAT @@ -203,10 +204,8 @@ private void flushFieldsWithoutMemoryMappedFile(Sorter.DocMap sortMap) throws IO builder.addVector(vector); } try (var dataset = builder.build()) { - flushFieldWithGpuGraph(cuVSResources, fieldInfo, dataset, sortMap); + flushFieldWithGpuGraph(resourcesHolder, fieldInfo, dataset, sortMap); } - } finally { - cuVSResourceManager.release(cuVSResources); } } var elapsed = started - System.nanoTime(); @@ -223,17 +222,13 @@ private void flushFieldWithMockGraph(FieldInfo fieldInfo, int numVectors, Sorter } } - private void flushFieldWithGpuGraph( - CuVSResourceManager.ManagedCuVSResources resources, - FieldInfo fieldInfo, - CuVSMatrix dataset, - Sorter.DocMap sortMap - ) throws IOException { + private void flushFieldWithGpuGraph(ResourcesHolder resourcesHolder, FieldInfo fieldInfo, CuVSMatrix dataset, Sorter.DocMap sortMap) + throws IOException { if (sortMap == null) { - generateGpuGraphAndWriteMeta(resources, fieldInfo, dataset); + generateGpuGraphAndWriteMeta(resourcesHolder, fieldInfo, dataset); } else { // TODO: use sortMap - generateGpuGraphAndWriteMeta(resources, fieldInfo, dataset); + generateGpuGraphAndWriteMeta(resourcesHolder, fieldInfo, dataset); } } @@ -265,20 +260,27 @@ public long ramBytesUsed() { return total; } - private void generateGpuGraphAndWriteMeta( - CuVSResourceManager.ManagedCuVSResources cuVSResources, - FieldInfo fieldInfo, - CuVSMatrix dataset - ) throws IOException { + private void generateGpuGraphAndWriteMeta(ResourcesHolder resourcesHolder, FieldInfo fieldInfo, CuVSMatrix dataset) throws IOException { try { assert dataset.size() >= MIN_NUM_VECTORS_FOR_GPU_BUILD; long vectorIndexOffset = vectorIndex.getFilePointer(); int[][] graphLevelNodeOffsets = new int[1][]; final HnswGraph graph; - try (var index = buildGPUIndex(cuVSResources, fieldInfo.getVectorSimilarityFunction(), dataset)) { + try (var index = buildGPUIndex(resourcesHolder.resources(), fieldInfo.getVectorSimilarityFunction(), dataset)) { assert index != null : "GPU index should be built for field: " + fieldInfo.name; - graph = writeGraph(index.getGraph(), graphLevelNodeOffsets); + var deviceGraph = index.getGraph(); + var graphSize = deviceGraph.size() * deviceGraph.columns() * Integer.BYTES; + if (graphSize < DIRECT_COPY_THRESHOLD_IN_BYTES) { + // If the graph is "small enough", copy it entirely to host memory so we can + // release the associated resource early and increase parallelism. + try (var hostGraph = index.getGraph().toHost()) { + resourcesHolder.close(); + graph = writeGraph(hostGraph, graphLevelNodeOffsets); + } + } else { + graph = writeGraph(deviceGraph, graphLevelNodeOffsets); + } } long vectorIndexLength = vectorIndex.getFilePointer() - vectorIndexOffset; writeMeta(fieldInfo, vectorIndexOffset, vectorIndexLength, (int) dataset.size(), graph, graphLevelNodeOffsets); @@ -444,97 +446,205 @@ public NodesIterator getNodesOnLevel(int level) { }; } - @SuppressForbidden(reason = "require usage of Lucene's IOUtils#deleteFilesIgnoringExceptions(...)") - private static void deleteFilesIgnoringExceptions(Directory dir, String fileName) { - org.apache.lucene.util.IOUtils.deleteFilesIgnoringExceptions(dir, fileName); - } - // TODO check with deleted documents @Override // fix sorted index case public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOException { - var started = System.nanoTime(); - flatVectorWriter.mergeOneField(fieldInfo, mergeState); - final int numVectors; - String tempRawVectorsFileName = null; - boolean success = false; - // save merged vector values to a temp file - try (IndexOutput out = mergeState.segmentInfo.dir.createTempOutput(mergeState.segmentInfo.name, "vec_", IOContext.DEFAULT)) { - tempRawVectorsFileName = out.getName(); - if (dataType == CuVSMatrix.DataType.BYTE) { - numVectors = writeByteVectorValues(out, getMergedByteVectorValues(fieldInfo, mergeState)); + try (var scorerSupplier = flatVectorWriter.mergeOneFieldToIndex(fieldInfo, mergeState)) { + var started = System.nanoTime(); + int numVectors = scorerSupplier.totalVectorCount(); + if (numVectors < MIN_NUM_VECTORS_FOR_GPU_BUILD) { + // we don't really need real value for vectors here, + // we just build a mock graph where every node is connected to every other node + generateMockGraphAndWriteMeta(fieldInfo, numVectors); } else { - numVectors = writeFloatVectorValues(fieldInfo, out, MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState)); + if (dataType == CuVSMatrix.DataType.FLOAT) { + var randomScorerSupplier = VectorsFormatReflectionUtils.getFlatRandomVectorScorerInnerSupplier(scorerSupplier); + mergeFloatVectorField(fieldInfo, mergeState, randomScorerSupplier, numVectors); + } else { + // During merging, we use quantized data, so we need to support byte[] too. + // That's how our current formats work: use floats during indexing, and quantized data to build a graph + // during merging. + assert dataType == CuVSMatrix.DataType.BYTE; + var randomScorerSupplier = VectorsFormatReflectionUtils.getScalarQuantizedRandomVectorScorerInnerSupplier( + scorerSupplier + ); + mergeByteVectorField(fieldInfo, mergeState, randomScorerSupplier, numVectors); + } } - CodecUtil.writeFooter(out); - success = true; - } finally { - if (success == false && tempRawVectorsFileName != null) { - deleteFilesIgnoringExceptions(mergeState.segmentInfo.dir, tempRawVectorsFileName); + var elapsed = started - System.nanoTime(); + logger.debug("Merged [{}] vectors in [{}ms]", numVectors, elapsed / 1_000_000.0); + } catch (Throwable t) { + throw new IOException("Failed to merge GPU index: ", t); + } + } + + private void mergeByteVectorField( + FieldInfo fieldInfo, + MergeState mergeState, + RandomVectorScorerSupplier randomScorerSupplier, + int numVectors + ) throws IOException, InterruptedException { + var vectorValues = randomScorerSupplier == null + ? null + : VectorsFormatReflectionUtils.getByteScoringSupplierVectorOrNull(randomScorerSupplier); + if (vectorValues != null) { + IndexInput slice = vectorValues.getSlice(); + var input = FilterIndexInput.unwrapOnlyTest(slice); + if (input instanceof MemorySegmentAccessInput memorySegmentAccessInput) { + // Direct access to mmapped file + // TODO: strides!! + // for int8_hnsw, the raw vector data has extra 4-byte at the end of each vector to encode a correction constant + int rowStride = fieldInfo.getVectorDimension() + 4; + try ( + var dataset = DatasetUtils.getInstance() + .fromInput(memorySegmentAccessInput, numVectors, fieldInfo.getVectorDimension(), rowStride, -1, dataType); + var resourcesHolder = new ResourcesHolder( + cuVSResourceManager, + cuVSResourceManager.acquire(numVectors, fieldInfo.getVectorDimension(), dataType) + ); + // Explicitly copy the dataset to GPU memory. The current (25.10) CAGRA index implementation has + // problems with strides; the explicit copy removes the stride while copying. + // Note that this is _not_ an additional copy: input data needs to be moved to GPU memory anyway, + // we are just doing it explicitly instead of relying on CagraIndex#build to do it. + var deviceDataSet = dataset.toDevice(resourcesHolder.resources()) + ) { + generateGpuGraphAndWriteMeta(resourcesHolder, fieldInfo, deviceDataSet); + } + } else { + logger.debug( + () -> "Cannot mmap merged raw vectors temporary file. IndexInput type [" + input.getClass().getSimpleName() + "]" + ); + + try ( + var resourcesHolder = new ResourcesHolder( + cuVSResourceManager, + cuVSResourceManager.acquire(numVectors, fieldInfo.getVectorDimension(), dataType) + ) + ) { + // Read vector-by-vector + var builder = CuVSMatrix.deviceBuilder( + resourcesHolder.resources(), + numVectors, + fieldInfo.getVectorDimension(), + dataType + ); + + byte[] vector = new byte[fieldInfo.getVectorDimension()]; + for (int i = 0; i < numVectors; ++i) { + input.readBytes(vector, 0, fieldInfo.getVectorDimension()); + builder.addVector(vector); + } + + try (var dataset = builder.build()) { + generateGpuGraphAndWriteMeta(resourcesHolder, fieldInfo, dataset); + } + } + } + } else { + logger.debug("Cannot get merged raw vectors from scorer."); + var byteVectorValues = getMergedByteVectorValues(fieldInfo, mergeState); + try ( + var resourcesHolder = new ResourcesHolder( + cuVSResourceManager, + cuVSResourceManager.acquire(numVectors, fieldInfo.getVectorDimension(), dataType) + ) + ) { + // Read vector-by-vector + final var builder = CuVSMatrix.deviceBuilder( + resourcesHolder.resources(), + numVectors, + fieldInfo.getVectorDimension(), + dataType + ); + final KnnVectorValues.DocIndexIterator iterator = byteVectorValues.iterator(); + for (int docV = iterator.nextDoc(); docV != NO_MORE_DOCS; docV = iterator.nextDoc()) { + builder.addVector(byteVectorValues.vectorValue(iterator.index())); + } + + try (var dataset = builder.build()) { + generateGpuGraphAndWriteMeta(resourcesHolder, fieldInfo, dataset); + } } } - try (IndexInput in = mergeState.segmentInfo.dir.openInput(tempRawVectorsFileName, IOContext.DEFAULT)) { - var input = FilterIndexInput.unwrapOnlyTest(in); + } - if (numVectors >= MIN_NUM_VECTORS_FOR_GPU_BUILD) { - if (input instanceof MemorySegmentAccessInput memorySegmentAccessInput) { - // Direct access to mmapped file - final var dataset = DatasetUtils.getInstance() + private void mergeFloatVectorField( + FieldInfo fieldInfo, + MergeState mergeState, + RandomVectorScorerSupplier randomScorerSupplier, + final int numVectors + ) throws IOException, InterruptedException { + var vectorValues = randomScorerSupplier == null + ? null + : VectorsFormatReflectionUtils.getFloatScoringSupplierVectorOrNull(randomScorerSupplier); + if (vectorValues != null) { + IndexInput slice = vectorValues.getSlice(); + var input = FilterIndexInput.unwrapOnlyTest(slice); + if (input instanceof MemorySegmentAccessInput memorySegmentAccessInput) { + // Direct access to mmapped file + try ( + var dataset = DatasetUtils.getInstance() .fromInput(memorySegmentAccessInput, numVectors, fieldInfo.getVectorDimension(), dataType); + var resourcesHolder = new ResourcesHolder( + cuVSResourceManager, + cuVSResourceManager.acquire(numVectors, fieldInfo.getVectorDimension(), dataType) + ) + ) { + generateGpuGraphAndWriteMeta(resourcesHolder, fieldInfo, dataset); + } + } else { + logger.debug( + () -> "Cannot mmap merged raw vectors temporary file. IndexInput type [" + input.getClass().getSimpleName() + "]" + ); - var cuVSResources = cuVSResourceManager.acquire(numVectors, fieldInfo.getVectorDimension(), dataType); - try { - generateGpuGraphAndWriteMeta(cuVSResources, fieldInfo, dataset); - } finally { - dataset.close(); - cuVSResourceManager.release(cuVSResources); - } - } else { - logger.debug( - () -> "Cannot mmap merged raw vectors temporary file. IndexInput type [" + input.getClass().getSimpleName() + "]" + try ( + var resourcesHolder = new ResourcesHolder( + cuVSResourceManager, + cuVSResourceManager.acquire(numVectors, fieldInfo.getVectorDimension(), dataType) + ) + ) { + // Read vector-by-vector + var builder = CuVSMatrix.deviceBuilder( + resourcesHolder.resources(), + numVectors, + fieldInfo.getVectorDimension(), + dataType ); - var cuVSResources = cuVSResourceManager.acquire(numVectors, fieldInfo.getVectorDimension(), dataType); - try { - // Read vector-by-vector - var builder = CuVSMatrix.deviceBuilder(cuVSResources, numVectors, fieldInfo.getVectorDimension(), dataType); - - // During merging, we use quantized data, so we need to support byte[] too. - // That's how our current formats work: use floats during indexing, and quantized data to build a graph - // during merging. - if (dataType == CuVSMatrix.DataType.FLOAT) { - float[] vector = new float[fieldInfo.getVectorDimension()]; - for (int i = 0; i < numVectors; ++i) { - input.readFloats(vector, 0, fieldInfo.getVectorDimension()); - builder.addVector(vector); - } - } else { - assert dataType == CuVSMatrix.DataType.BYTE; - byte[] vector = new byte[fieldInfo.getVectorDimension()]; - for (int i = 0; i < numVectors; ++i) { - input.readBytes(vector, 0, fieldInfo.getVectorDimension()); - builder.addVector(vector); - } - } - try (var dataset = builder.build()) { - generateGpuGraphAndWriteMeta(cuVSResources, fieldInfo, dataset); - } - } finally { - cuVSResourceManager.release(cuVSResources); + float[] vector = new float[fieldInfo.getVectorDimension()]; + for (int i = 0; i < numVectors; ++i) { + input.readFloats(vector, 0, fieldInfo.getVectorDimension()); + builder.addVector(vector); + } + + try (var dataset = builder.build()) { + generateGpuGraphAndWriteMeta(resourcesHolder, fieldInfo, dataset); } } - } else { - // we don't really need real value for vectors here, - // we just build a mock graph where every node is connected to every other node - generateMockGraphAndWriteMeta(fieldInfo, numVectors); } - } catch (Throwable t) { - throw new IOException("Failed to merge GPU index: ", t); - } finally { - deleteFilesIgnoringExceptions(mergeState.segmentInfo.dir, tempRawVectorsFileName); + } else { + logger.debug("Cannot get merged raw vectors from scorer."); + FloatVectorValues floatVectorValues = MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState); + try ( + var resourcesHolder = new ResourcesHolder( + cuVSResourceManager, + cuVSResourceManager.acquire(numVectors, fieldInfo.getVectorDimension(), dataType) + ) + ) { + // Read vector-by-vector + var builder = CuVSMatrix.deviceBuilder(resourcesHolder.resources(), numVectors, fieldInfo.getVectorDimension(), dataType); + + final KnnVectorValues.DocIndexIterator iterator = floatVectorValues.iterator(); + for (int docV = iterator.nextDoc(); docV != NO_MORE_DOCS; docV = iterator.nextDoc()) { + float[] vector = floatVectorValues.vectorValue(iterator.index()); + builder.addVector(vector); + } + try (var dataset = builder.build()) { + generateGpuGraphAndWriteMeta(resourcesHolder, fieldInfo, dataset); + } + } } - var elapsed = started - System.nanoTime(); - logger.debug("Merged [{}] vectors in [{}ms]", numVectors, elapsed / 1_000_000.0); } private ByteVectorValues getMergedByteVectorValues(FieldInfo fieldInfo, MergeState mergeState) throws IOException { @@ -545,32 +655,6 @@ private ByteVectorValues getMergedByteVectorValues(FieldInfo fieldInfo, MergeSta return MergedQuantizedVectorValues.mergeQuantizedByteVectorValues(fieldInfo, mergeState, quantizer); } - private static int writeByteVectorValues(IndexOutput out, ByteVectorValues vectorValues) throws IOException { - int numVectors = 0; - byte[] vector; - final KnnVectorValues.DocIndexIterator iterator = vectorValues.iterator(); - for (int docV = iterator.nextDoc(); docV != NO_MORE_DOCS; docV = iterator.nextDoc()) { - numVectors++; - vector = vectorValues.vectorValue(iterator.index()); - out.writeBytes(vector, vector.length); - } - return numVectors; - } - - private static int writeFloatVectorValues(FieldInfo fieldInfo, IndexOutput out, FloatVectorValues floatVectorValues) - throws IOException { - int numVectors = 0; - final ByteBuffer buffer = ByteBuffer.allocate(fieldInfo.getVectorDimension() * Float.BYTES).order(ByteOrder.LITTLE_ENDIAN); - final KnnVectorValues.DocIndexIterator iterator = floatVectorValues.iterator(); - for (int docV = iterator.nextDoc(); docV != NO_MORE_DOCS; docV = iterator.nextDoc()) { - numVectors++; - float[] vector = floatVectorValues.vectorValue(iterator.index()); - buffer.asFloatBuffer().put(vector); - out.writeBytes(buffer.array(), buffer.array().length); - } - return numVectors; - } - private void writeMeta( FieldInfo field, long vectorIndexOffset, @@ -675,10 +759,6 @@ public void addValue(int docID, float[] vectorValue) throws IOException { lastDocID = docID; } - public DocsWithFieldSet getDocsWithFieldSet() { - return flatFieldVectorsWriter.getDocsWithFieldSet(); - } - @Override public float[] copyValue(float[] vectorValue) { throw new UnsupportedOperationException(); diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ResourcesHolder.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ResourcesHolder.java new file mode 100644 index 0000000000000..91a1bfa1dd781 --- /dev/null +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ResourcesHolder.java @@ -0,0 +1,40 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.gpu.codec; + +/** + * Holds an acquired resource, allows to manually release it, but still ensures it gets released (closed) + * at the end of a try-with-resources block via the {@link AutoCloseable} pattern. + */ +class ResourcesHolder implements AutoCloseable { + + private final CuVSResourceManager resourceManager; + private CuVSResourceManager.ManagedCuVSResources managedResource; + + ResourcesHolder(CuVSResourceManager cuVSResourceManager, CuVSResourceManager.ManagedCuVSResources managedResource) { + this.resourceManager = cuVSResourceManager; + this.managedResource = managedResource; + } + + CuVSResourceManager.ManagedCuVSResources resources() { + return managedResource; + } + + void release() { + if (managedResource != null) { + var toRelease = managedResource; + managedResource = null; + resourceManager.release(toRelease); + } + } + + @Override + public void close() { + release(); + } +} From 0819dbd0b2aac757ba5b24d67c01855a935f1f57 Mon Sep 17 00:00:00 2001 From: ldematte Date: Fri, 10 Oct 2025 17:01:03 +0200 Subject: [PATCH 2/6] Fix access --- .../server/cli/SystemJvmOptions.java | 3 +++ .../simdvec/QuantizedByteVectorValuesAccess.java | 16 ++++++++++++++++ .../reflect/VectorsFormatReflectionUtils.java | 12 +++--------- .../org/elasticsearch/xpack/gpu/GPUSupport.java | 2 +- .../xpack/gpu/GPUClientYamlTestSuiteIT.java | 5 ++++- 5 files changed, 27 insertions(+), 11 deletions(-) create mode 100644 libs/simdvec/src/main/java/org/elasticsearch/simdvec/QuantizedByteVectorValuesAccess.java diff --git a/distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/SystemJvmOptions.java b/distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/SystemJvmOptions.java index d2a7bbb7345d1..40b3679d8fb79 100644 --- a/distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/SystemJvmOptions.java +++ b/distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/SystemJvmOptions.java @@ -60,6 +60,9 @@ static List systemJvmOptions(Settings nodeSettings, final Map FLAT_CLOSEABLE_RANDOM_VECTOR_SCORER_SUPPLIER_CLASS; private static final Class SCALAR_QUANTIZED_CLOSEABLE_RANDOM_VECTOR_SCORER_SUPPLIER_CLASS; private static final Class FLOAT_SCORING_SUPPLIER_CLASS; - private static final Class BYTE_SCORING_SUPPLIER_CLASS; static { try { FLAT_CLOSEABLE_RANDOM_VECTOR_SCORER_SUPPLIER_CLASS = Class.forName( @@ -56,10 +54,6 @@ public class VectorsFormatReflectionUtils { RandomVectorScorerSupplier.class ); - BYTE_SCORING_SUPPLIER_CLASS = Class.forName("org.elasticsearch.simdvec.internal.Int7SQVectorScorerSupplier"); - lookup = MethodHandles.privateLookupIn(BYTE_SCORING_SUPPLIER_CLASS, MethodHandles.lookup()); - BYTE_VECTORS_HANDLE = lookup.findVarHandle(BYTE_SCORING_SUPPLIER_CLASS, "values", QuantizedByteVectorValues.class); - } catch (IllegalAccessException e) { throw new AssertionError("should not happen, check opens", e); } catch (ReflectiveOperationException e) { @@ -94,8 +88,8 @@ public static HasIndexSlice getFloatScoringSupplierVectorOrNull(RandomVectorScor } public static HasIndexSlice getByteScoringSupplierVectorOrNull(RandomVectorScorerSupplier scorerSupplier) { - if (BYTE_SCORING_SUPPLIER_CLASS.isAssignableFrom(scorerSupplier.getClass())) { - return (QuantizedByteVectorValues) BYTE_VECTORS_HANDLE.get(scorerSupplier); + if (scorerSupplier instanceof QuantizedByteVectorValuesAccess quantizedByteVectorValuesAccess) { + return quantizedByteVectorValuesAccess.get(); } return null; } diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java index c21bda894790a..f6e0c58495ef3 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java @@ -59,7 +59,7 @@ public static boolean isSupported(boolean logError) { } } else { if (logError) { - LOG.info("Found compatible GPU [{}] (id: [{}])", gpu.name(), gpu.gpuId()); + LOG.debug("Found compatible GPU [{}] (id: [{}])", gpu.name(), gpu.gpuId()); } return true; } diff --git a/x-pack/plugin/gpu/src/yamlRestTest/java/org/elasticsearch/xpack/gpu/GPUClientYamlTestSuiteIT.java b/x-pack/plugin/gpu/src/yamlRestTest/java/org/elasticsearch/xpack/gpu/GPUClientYamlTestSuiteIT.java index c4e7e936b0111..e9211f65bc5c0 100644 --- a/x-pack/plugin/gpu/src/yamlRestTest/java/org/elasticsearch/xpack/gpu/GPUClientYamlTestSuiteIT.java +++ b/x-pack/plugin/gpu/src/yamlRestTest/java/org/elasticsearch/xpack/gpu/GPUClientYamlTestSuiteIT.java @@ -29,7 +29,10 @@ private static ElasticsearchCluster createCluster() { .nodes(1) .module("gpu") .setting("xpack.license.self_generated.type", "trial") - .setting("xpack.security.enabled", "false"); + .setting("xpack.security.enabled", "false") + // temporary until we get access to raw vectors in a future Lucene version + .jvmArg("--add-opens=org.apache.lucene.core/org.apache.lucene.codecs.lucene99=org.elasticsearch.server") + .jvmArg("--add-opens=org.apache.lucene.core/org.apache.lucene.internal.vectorization=org.elasticsearch.server"); var libraryPath = System.getenv("LD_LIBRARY_PATH"); if (libraryPath != null) { From 48a3f7c0edfad1cb52ed6acb57973174edcb77b7 Mon Sep 17 00:00:00 2001 From: ldematte Date: Fri, 10 Oct 2025 17:51:43 +0200 Subject: [PATCH 3/6] Expose vector values from Int7SQVectorScorerSupplier --- .../simdvec/internal/Int7SQVectorScorerSupplier.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/libs/simdvec/src/main21/java/org/elasticsearch/simdvec/internal/Int7SQVectorScorerSupplier.java b/libs/simdvec/src/main21/java/org/elasticsearch/simdvec/internal/Int7SQVectorScorerSupplier.java index 19f33ba1c71f7..49f8da4d77df5 100644 --- a/libs/simdvec/src/main21/java/org/elasticsearch/simdvec/internal/Int7SQVectorScorerSupplier.java +++ b/libs/simdvec/src/main21/java/org/elasticsearch/simdvec/internal/Int7SQVectorScorerSupplier.java @@ -14,6 +14,7 @@ import org.apache.lucene.util.hnsw.UpdateableRandomVectorScorer; import org.apache.lucene.util.quantization.QuantizedByteVectorValues; import org.apache.lucene.util.quantization.ScalarQuantizedVectorSimilarity; +import org.elasticsearch.simdvec.QuantizedByteVectorValuesAccess; import java.io.IOException; import java.lang.foreign.MemorySegment; @@ -23,7 +24,7 @@ import static org.apache.lucene.index.VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT; import static org.apache.lucene.util.quantization.ScalarQuantizedVectorSimilarity.fromVectorSimilarity; -public abstract sealed class Int7SQVectorScorerSupplier implements RandomVectorScorerSupplier { +public abstract sealed class Int7SQVectorScorerSupplier implements RandomVectorScorerSupplier, QuantizedByteVectorValuesAccess { static final byte BITS = 7; @@ -107,6 +108,11 @@ public void setScoringOrdinal(int node) throws IOException { }; } + @Override + public QuantizedByteVectorValues get() { + return values; + } + public static final class EuclideanSupplier extends Int7SQVectorScorerSupplier { public EuclideanSupplier(MemorySegmentAccessInput input, QuantizedByteVectorValues values, float scoreCorrectionConstant) { From 1fc9ff1120d7d94b289419329c40ff8caf572584 Mon Sep 17 00:00:00 2001 From: Lorenzo Dematte Date: Mon, 13 Oct 2025 08:32:41 +0200 Subject: [PATCH 4/6] PR feedback --- .../codec/vectors/reflect/VectorsFormatReflectionUtils.java | 3 ++- .../xpack/gpu/codec/ES92GpuHnswVectorsWriter.java | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/VectorsFormatReflectionUtils.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/VectorsFormatReflectionUtils.java index 8382475bad327..7c6f93740f899 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/VectorsFormatReflectionUtils.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/VectorsFormatReflectionUtils.java @@ -13,6 +13,7 @@ import org.apache.lucene.index.FloatVectorValues; import org.apache.lucene.util.hnsw.CloseableRandomVectorScorerSupplier; import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier; +import org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsWriter; import org.elasticsearch.simdvec.QuantizedByteVectorValuesAccess; import java.lang.invoke.MethodHandles; @@ -45,7 +46,7 @@ public class VectorsFormatReflectionUtils { FLOAT_VECTORS_HANDLE = lookup.findVarHandle(FLOAT_SCORING_SUPPLIER_CLASS, "values", FloatVectorValues.class); SCALAR_QUANTIZED_CLOSEABLE_RANDOM_VECTOR_SCORER_SUPPLIER_CLASS = Class.forName( - "org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsWriter$ScalarQuantizedCloseableRandomVectorScorerSupplier" + Lucene99ScalarQuantizedVectorsWriter.class.getCanonicalName() + "$ScalarQuantizedCloseableRandomVectorScorerSupplier" ); lookup = MethodHandles.privateLookupIn(SCALAR_QUANTIZED_CLOSEABLE_RANDOM_VECTOR_SCORER_SUPPLIER_CLASS, MethodHandles.lookup()); BYTE_SUPPLIER_HANDLE = lookup.findVarHandle( diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsWriter.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsWriter.java index f415485094faa..52cae8d9473ac 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsWriter.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsWriter.java @@ -542,7 +542,7 @@ private void mergeByteVectorField( } } } else { - logger.debug("Cannot get merged raw vectors from scorer."); + logger.warn("Cannot get merged raw vectors from scorer."); var byteVectorValues = getMergedByteVectorValues(fieldInfo, mergeState); try ( var resourcesHolder = new ResourcesHolder( @@ -624,7 +624,7 @@ private void mergeFloatVectorField( } } } else { - logger.debug("Cannot get merged raw vectors from scorer."); + logger.warn("Cannot get merged raw vectors from scorer."); FloatVectorValues floatVectorValues = MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState); try ( var resourcesHolder = new ResourcesHolder( From a9045982ddf3d43dec191217447a6e117cc8f9e0 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 13 Oct 2025 06:39:59 +0000 Subject: [PATCH 5/6] [CI] Auto commit changes from spotless --- .../codec/vectors/reflect/VectorsFormatReflectionUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/VectorsFormatReflectionUtils.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/VectorsFormatReflectionUtils.java index 7c6f93740f899..cae4dc12e28cf 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/VectorsFormatReflectionUtils.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/VectorsFormatReflectionUtils.java @@ -10,10 +10,10 @@ package org.elasticsearch.index.codec.vectors.reflect; import org.apache.lucene.codecs.lucene95.HasIndexSlice; +import org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsWriter; import org.apache.lucene.index.FloatVectorValues; import org.apache.lucene.util.hnsw.CloseableRandomVectorScorerSupplier; import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier; -import org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsWriter; import org.elasticsearch.simdvec.QuantizedByteVectorValuesAccess; import java.lang.invoke.MethodHandles; From de1f42a27a1596dca973bb46cf991172c95a6a2c Mon Sep 17 00:00:00 2001 From: Lorenzo Dematte Date: Mon, 13 Oct 2025 18:11:48 +0200 Subject: [PATCH 6/6] Fix --- .../elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsWriter.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsWriter.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsWriter.java index 3c5ef544541c7..a9ec6d6cdf2bd 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsWriter.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsWriter.java @@ -206,8 +206,6 @@ private void flushFieldsWithoutMemoryMappedFile(Sorter.DocMap sortMap) throws IO try (var dataset = builder.build()) { flushFieldWithGpuGraph(resourcesHolder, fieldInfo, dataset, sortMap); } - } finally { - cuVSResourceManager.release(cuVSResources); } } var elapsed = started - System.nanoTime();