From 3af34ab487a9639b1d6d72c6c7d3f4b2e13f0def Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Tue, 17 Jun 2025 13:26:47 -0400 Subject: [PATCH 001/109] Iter1 - Created GPUVectorsFormat that write/read of flat vectors - Added a new index_options: gpu for dense_vector field that is under the feature flag --- server/src/main/java/module-info.java | 3 +- .../index/codec/vectors/GPUVectorsFormat.java | 74 ++++++ .../index/codec/vectors/GPUVectorsReader.java | 220 ++++++++++++++++++ .../index/codec/vectors/GPUVectorsWriter.java | 170 ++++++++++++++ .../vectors/DenseVectorFieldMapper.java | 56 +++++ .../org.apache.lucene.codecs.KnnVectorsFormat | 1 + .../codec/vectors/GPUVectorsFormatTests.java | 76 ++++++ .../vectors/DenseVectorFieldMapperTests.java | 61 +++++ 8 files changed, 660 insertions(+), 1 deletion(-) create mode 100644 server/src/main/java/org/elasticsearch/index/codec/vectors/GPUVectorsFormat.java create mode 100644 server/src/main/java/org/elasticsearch/index/codec/vectors/GPUVectorsReader.java create mode 100644 server/src/main/java/org/elasticsearch/index/codec/vectors/GPUVectorsWriter.java create mode 100644 server/src/test/java/org/elasticsearch/index/codec/vectors/GPUVectorsFormatTests.java diff --git a/server/src/main/java/module-info.java b/server/src/main/java/module-info.java index 7b2b6d91fdc83..6b088676e2b5c 100644 --- a/server/src/main/java/module-info.java +++ b/server/src/main/java/module-info.java @@ -457,7 +457,8 @@ org.elasticsearch.index.codec.vectors.es816.ES816HnswBinaryQuantizedVectorsFormat, org.elasticsearch.index.codec.vectors.es818.ES818BinaryQuantizedVectorsFormat, org.elasticsearch.index.codec.vectors.es818.ES818HnswBinaryQuantizedVectorsFormat, - org.elasticsearch.index.codec.vectors.IVFVectorsFormat; + org.elasticsearch.index.codec.vectors.IVFVectorsFormat, + org.elasticsearch.index.codec.vectors.GPUVectorsFormat; provides org.apache.lucene.codecs.Codec with diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/GPUVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/GPUVectorsFormat.java new file mode 100644 index 0000000000000..17343f7a934dc --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/GPUVectorsFormat.java @@ -0,0 +1,74 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.codec.vectors; + +import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.KnnVectorsReader; +import org.apache.lucene.codecs.KnnVectorsWriter; +import org.apache.lucene.codecs.hnsw.FlatVectorScorerUtil; +import org.apache.lucene.codecs.hnsw.FlatVectorsFormat; +import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsFormat; +import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SegmentWriteState; + +import java.io.IOException; + +/** + * Codec format for GPU-accelerated vector indexes. This format is designed to + * leverage GPU processing capabilities for vector search operations. + */ +public class GPUVectorsFormat extends KnnVectorsFormat { + + public static final String NAME = "GPUVectorsFormat"; + public static final String GPU_IDX_EXTENSION = "gpuidx"; + public static final String GPU_META_EXTENSION = "mgpu"; + + public static final int VERSION_START = 0; + public static final int VERSION_CURRENT = VERSION_START; + + private static final FlatVectorsFormat rawVectorFormat = new Lucene99FlatVectorsFormat( + FlatVectorScorerUtil.getLucene99FlatVectorsScorer() + ); + + public GPUVectorsFormat() { + super(NAME); + } + + @Override + public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException { + return new GPUVectorsWriter(state, rawVectorFormat.fieldsWriter(state)); + } + + @Override + public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException { + return new GPUVectorsReader(state, rawVectorFormat.fieldsReader(state)); + } + + @Override + public int getMaxDimensions(String fieldName) { + return 4096; + } + + @Override + public String toString() { + return NAME + "()"; + } + + static GPUVectorsReader getGPUReader(KnnVectorsReader vectorsReader, String fieldName) { + if (vectorsReader instanceof PerFieldKnnVectorsFormat.FieldsReader candidateReader) { + vectorsReader = candidateReader.getFieldReader(fieldName); + } + if (vectorsReader instanceof GPUVectorsReader reader) { + return reader; + } + return null; + } +} diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/GPUVectorsReader.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/GPUVectorsReader.java new file mode 100644 index 0000000000000..db40e07f53ef7 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/GPUVectorsReader.java @@ -0,0 +1,220 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.codec.vectors; + +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.KnnVectorsReader; +import org.apache.lucene.codecs.hnsw.FlatVectorsReader; +import org.apache.lucene.index.ByteVectorValues; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.FloatVectorValues; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.VectorEncoding; +import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.internal.hppc.IntObjectHashMap; +import org.apache.lucene.search.KnnCollector; +import org.apache.lucene.store.ChecksumIndexInput; +import org.apache.lucene.store.DataInput; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.hnsw.OrdinalTranslatedKnnCollector; +import org.apache.lucene.util.hnsw.RandomVectorScorer; +import org.elasticsearch.core.IOUtils; + +import java.io.IOException; + +import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.SIMILARITY_FUNCTIONS; + +/** + * Reader for GPU-accelerated vectors. This reader is used to read the GPU vectors from the index. + */ +public class GPUVectorsReader extends KnnVectorsReader { + + private final IndexInput gpuIdx; + private final SegmentReadState state; + private final FieldInfos fieldInfos; + protected final IntObjectHashMap fields; + private final FlatVectorsReader rawVectorsReader; + + @SuppressWarnings("this-escape") + public GPUVectorsReader(SegmentReadState state, FlatVectorsReader rawVectorsReader) throws IOException { + this.state = state; + this.fieldInfos = state.fieldInfos; + this.rawVectorsReader = rawVectorsReader; + this.fields = new IntObjectHashMap<>(); + String meta = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, GPUVectorsFormat.GPU_META_EXTENSION); + + int versionMeta = -1; + boolean success = false; + try (ChecksumIndexInput gpuMeta = state.directory.openChecksumInput(meta)) { + Throwable priorE = null; + try { + versionMeta = CodecUtil.checkIndexHeader( + gpuMeta, + GPUVectorsFormat.NAME, + GPUVectorsFormat.VERSION_START, + GPUVectorsFormat.VERSION_CURRENT, + state.segmentInfo.getId(), + state.segmentSuffix + ); + readFields(gpuMeta); + } catch (Throwable exception) { + priorE = exception; + } finally { + CodecUtil.checkFooter(gpuMeta, priorE); + } + gpuIdx = openDataInput(state, versionMeta, GPUVectorsFormat.GPU_IDX_EXTENSION, GPUVectorsFormat.NAME, state.context); + success = true; + } finally { + if (success == false) { + IOUtils.closeWhileHandlingException(this); + } + } + } + + private static IndexInput openDataInput( + SegmentReadState state, + int versionMeta, + String fileExtension, + String codecName, + IOContext context + ) throws IOException { + final String fileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, fileExtension); + final IndexInput in = state.directory.openInput(fileName, context); + boolean success = false; + try { + final int versionVectorData = CodecUtil.checkIndexHeader( + in, + codecName, + GPUVectorsFormat.VERSION_START, + GPUVectorsFormat.VERSION_CURRENT, + state.segmentInfo.getId(), + state.segmentSuffix + ); + if (versionMeta != versionVectorData) { + throw new CorruptIndexException( + "Format versions mismatch: meta=" + versionMeta + ", " + codecName + "=" + versionVectorData, + in + ); + } + CodecUtil.retrieveChecksum(in); + success = true; + return in; + } finally { + if (success == false) { + IOUtils.closeWhileHandlingException(in); + } + } + } + + private void readFields(ChecksumIndexInput meta) throws IOException { + for (int fieldNumber = meta.readInt(); fieldNumber != -1; fieldNumber = meta.readInt()) { + final FieldInfo info = fieldInfos.fieldInfo(fieldNumber); + if (info == null) { + throw new CorruptIndexException("Invalid field number: " + fieldNumber, meta); + } + fields.put(info.number, readField(meta, info)); + } + } + + private FieldEntry readField(IndexInput input, FieldInfo info) throws IOException { + final VectorEncoding vectorEncoding = readVectorEncoding(input); + final VectorSimilarityFunction similarityFunction = readSimilarityFunction(input); + final long dataOffset = input.readLong(); + final long dataLength = input.readLong(); + + if (similarityFunction != info.getVectorSimilarityFunction()) { + throw new IllegalStateException( + "Inconsistent vector similarity function for field=\"" + + info.name + + "\"; " + + similarityFunction + + " != " + + info.getVectorSimilarityFunction() + ); + } + return new FieldEntry(similarityFunction, vectorEncoding, dataOffset, dataLength); + } + + private static VectorSimilarityFunction readSimilarityFunction(DataInput input) throws IOException { + final int i = input.readInt(); + if (i < 0 || i >= SIMILARITY_FUNCTIONS.size()) { + throw new IllegalArgumentException("invalid distance function: " + i); + } + return SIMILARITY_FUNCTIONS.get(i); + } + + private static VectorEncoding readVectorEncoding(DataInput input) throws IOException { + final int encodingId = input.readInt(); + if (encodingId < 0 || encodingId >= VectorEncoding.values().length) { + throw new CorruptIndexException("Invalid vector encoding id: " + encodingId, input); + } + return VectorEncoding.values()[encodingId]; + } + + @Override + public final void checkIntegrity() throws IOException { + rawVectorsReader.checkIntegrity(); + CodecUtil.checksumEntireFile(gpuIdx); + } + + @Override + public final FloatVectorValues getFloatVectorValues(String field) throws IOException { + return rawVectorsReader.getFloatVectorValues(field); + } + + @Override + public final ByteVectorValues getByteVectorValues(String field) throws IOException { + return rawVectorsReader.getByteVectorValues(field); + } + + @Override + public final void search(String field, float[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException { + // TODO: Implement GPU-accelerated search + collectAllMatchingDocs(knnCollector, acceptDocs, rawVectorsReader.getRandomVectorScorer(field, target)); + } + + private void collectAllMatchingDocs(KnnCollector knnCollector, Bits acceptDocs, RandomVectorScorer scorer) throws IOException { + OrdinalTranslatedKnnCollector collector = new OrdinalTranslatedKnnCollector(knnCollector, scorer::ordToDoc); + Bits acceptedOrds = scorer.getAcceptOrds(acceptDocs); + for (int i = 0; i < scorer.maxOrd(); i++) { + if (acceptedOrds == null || acceptedOrds.get(i)) { + collector.collect(i, scorer.score(i)); + collector.incVisitedCount(1); + } + } + assert collector.earlyTerminated() == false; + } + + @Override + public final void search(String field, byte[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException { + collectAllMatchingDocs(knnCollector, acceptDocs, rawVectorsReader.getRandomVectorScorer(field, target)); + } + + @Override + public void close() throws IOException { + IOUtils.close(rawVectorsReader, gpuIdx); + } + + protected record FieldEntry( + VectorSimilarityFunction similarityFunction, + VectorEncoding vectorEncoding, + long dataOffset, + long dataLength + ) { + IndexInput dataSlice(IndexInput dataFile) throws IOException { + return dataFile.slice("gpu-data", dataOffset, dataLength); + } + } +} diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/GPUVectorsWriter.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/GPUVectorsWriter.java new file mode 100644 index 0000000000000..0245105a712b1 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/GPUVectorsWriter.java @@ -0,0 +1,170 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.codec.vectors; + +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.KnnFieldVectorsWriter; +import org.apache.lucene.codecs.KnnVectorsWriter; +import org.apache.lucene.codecs.hnsw.FlatFieldVectorsWriter; +import org.apache.lucene.codecs.hnsw.FlatVectorsWriter; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FloatVectorValues; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.MergeState; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.Sorter; +import org.apache.lucene.index.VectorEncoding; +import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.store.IndexOutput; +import org.elasticsearch.core.IOUtils; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.SIMILARITY_FUNCTIONS; + +/** + * Writer for GPU-accelerated vectors. + */ +public class GPUVectorsWriter extends KnnVectorsWriter { + + private final List fieldWriters = new ArrayList<>(); + private final IndexOutput gpuIdx; + private final IndexOutput gpuMeta; + private final FlatVectorsWriter rawVectorDelegate; + private final SegmentWriteState segmentWriteState; + + @SuppressWarnings("this-escape") + public GPUVectorsWriter(SegmentWriteState state, FlatVectorsWriter rawVectorDelegate) throws IOException { + this.segmentWriteState = state; + this.rawVectorDelegate = rawVectorDelegate; + final String metaFileName = IndexFileNames.segmentFileName( + state.segmentInfo.name, + state.segmentSuffix, + GPUVectorsFormat.GPU_META_EXTENSION + ); + + final String gpuIdxFileName = IndexFileNames.segmentFileName( + state.segmentInfo.name, + state.segmentSuffix, + GPUVectorsFormat.GPU_IDX_EXTENSION + ); + boolean success = false; + try { + gpuMeta = state.directory.createOutput(metaFileName, state.context); + CodecUtil.writeIndexHeader( + gpuMeta, + GPUVectorsFormat.NAME, + GPUVectorsFormat.VERSION_CURRENT, + state.segmentInfo.getId(), + state.segmentSuffix + ); + gpuIdx = state.directory.createOutput(gpuIdxFileName, state.context); + CodecUtil.writeIndexHeader( + gpuIdx, + GPUVectorsFormat.NAME, + GPUVectorsFormat.VERSION_CURRENT, + state.segmentInfo.getId(), + state.segmentSuffix + ); + success = true; + } finally { + if (success == false) { + IOUtils.closeWhileHandlingException(this); + } + } + } + + @Override + public final KnnFieldVectorsWriter addField(FieldInfo fieldInfo) throws IOException { + final FlatFieldVectorsWriter rawVectorDelegate = this.rawVectorDelegate.addField(fieldInfo); + if (fieldInfo.getVectorEncoding().equals(VectorEncoding.FLOAT32)) { + @SuppressWarnings("unchecked") + final FlatFieldVectorsWriter floatWriter = (FlatFieldVectorsWriter) rawVectorDelegate; + fieldWriters.add(new FieldWriter(fieldInfo, floatWriter)); + } + return rawVectorDelegate; + } + + @Override + public final void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException { + rawVectorDelegate.flush(maxDoc, sortMap); + for (FieldWriter fieldWriter : fieldWriters) { + // TODO: Implement GPU-specific vector merging instead of bogus implementation + long dataOffset = gpuIdx.alignFilePointer(Float.BYTES); + var vectors = fieldWriter.delegate.getVectors(); + for (int i = 0; i < vectors.size(); i++) { + gpuIdx.writeVInt(0); + } + long dataLength = gpuIdx.getFilePointer() - dataOffset; + writeMeta(fieldWriter.fieldInfo, dataOffset, dataLength); + } + } + + @Override + public final void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOException { + if (fieldInfo.getVectorEncoding().equals(VectorEncoding.FLOAT32)) { + rawVectorDelegate.mergeOneField(fieldInfo, mergeState); + // TODO: Implement GPU-specific vector merging instead of bogus implementation + FloatVectorValues floatVectorValues = KnnVectorsWriter.MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState); + long dataOffset = gpuIdx.alignFilePointer(Float.BYTES); + for (int i = 0; i < floatVectorValues.size(); i++) { + gpuIdx.writeVInt(0); + } + long dataLength = gpuIdx.getFilePointer() - dataOffset; + writeMeta(fieldInfo, dataOffset, dataLength); + } else { + rawVectorDelegate.mergeOneField(fieldInfo, mergeState); + } + } + + private void writeMeta(FieldInfo field, long dataOffset, long dataLength) throws IOException { + gpuMeta.writeInt(field.number); + gpuMeta.writeInt(field.getVectorEncoding().ordinal()); + gpuMeta.writeInt(distFuncToOrd(field.getVectorSimilarityFunction())); + gpuMeta.writeLong(dataOffset); + gpuMeta.writeLong(dataLength); + } + + private static int distFuncToOrd(VectorSimilarityFunction func) { + for (int i = 0; i < SIMILARITY_FUNCTIONS.size(); i++) { + if (SIMILARITY_FUNCTIONS.get(i).equals(func)) { + return (byte) i; + } + } + throw new IllegalArgumentException("invalid distance function: " + func); + } + + @Override + public final void finish() throws IOException { + rawVectorDelegate.finish(); + if (gpuMeta != null) { + // write end of fields marker + gpuMeta.writeInt(-1); + CodecUtil.writeFooter(gpuMeta); + } + if (gpuIdx != null) { + CodecUtil.writeFooter(gpuIdx); + } + } + + @Override + public final void close() throws IOException { + IOUtils.close(rawVectorDelegate, gpuMeta, gpuIdx); + } + + @Override + public final long ramBytesUsed() { + return rawVectorDelegate.ramBytesUsed(); + } + + private record FieldWriter(FieldInfo fieldInfo, FlatFieldVectorsWriter delegate) {} +} diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java index a0e8ceff02248..48f125130bb1d 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java @@ -49,6 +49,7 @@ import org.elasticsearch.index.codec.vectors.ES814HnswScalarQuantizedVectorsFormat; import org.elasticsearch.index.codec.vectors.ES815BitFlatVectorFormat; import org.elasticsearch.index.codec.vectors.ES815HnswBitVectorsFormat; +import org.elasticsearch.index.codec.vectors.GPUVectorsFormat; import org.elasticsearch.index.codec.vectors.IVFVectorsFormat; import org.elasticsearch.index.codec.vectors.es818.ES818BinaryQuantizedVectorsFormat; import org.elasticsearch.index.codec.vectors.es818.ES818HnswBinaryQuantizedVectorsFormat; @@ -122,6 +123,7 @@ public class DenseVectorFieldMapper extends FieldMapper { public static final int BBQ_MIN_DIMS = 64; public static final FeatureFlag IVF_FORMAT = new FeatureFlag("ivf_format"); + public static final FeatureFlag GPU_FORMAT = new FeatureFlag("gpu_format"); public static boolean isNotUnitVector(float magnitude) { return Math.abs(magnitude - 1.0f) > EPS; @@ -1648,6 +1650,23 @@ public boolean supportsElementType(ElementType elementType) { return elementType == ElementType.FLOAT; } + @Override + public boolean supportsDimension(int dims) { + return true; + } + }, + GPU("gpu", false) { + @Override + public IndexOptions parseIndexOptions(String fieldName, Map indexOptionsMap, IndexVersion indexVersion) { + MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap); + return new GPUIndexOptions(); + } + + @Override + public boolean supportsElementType(ElementType elementType) { + return elementType == ElementType.FLOAT; + } + @Override public boolean supportsDimension(int dims) { return true; @@ -1657,6 +1676,7 @@ public boolean supportsDimension(int dims) { static Optional fromString(String type) { return Stream.of(VectorIndexType.values()) .filter(vectorIndexType -> vectorIndexType != VectorIndexType.BBQ_IVF || IVF_FORMAT.isEnabled()) + .filter(vectorIndexType -> vectorIndexType != VectorIndexType.GPU || GPU_FORMAT.isEnabled()) .filter(vectorIndexType -> vectorIndexType.name.equals(type)) .findFirst(); } @@ -1774,6 +1794,42 @@ public int doHashCode() { } } + static class GPUIndexOptions extends IndexOptions { + + GPUIndexOptions() { + super(VectorIndexType.GPU); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field("type", type); + builder.endObject(); + return builder; + } + + @Override + KnnVectorsFormat getVectorsFormat(ElementType elementType) { + assert elementType == ElementType.FLOAT; + return new GPUVectorsFormat(); + } + + @Override + boolean updatableTo(IndexOptions update) { + return false; + } + + @Override + public boolean doEquals(IndexOptions o) { + return o instanceof GPUIndexOptions; + } + + @Override + public int doHashCode() { + return Objects.hash(type); + } + } + static class Int4HnswIndexOptions extends QuantizedIndexOptions { private final int m; private final int efConstruction; diff --git a/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat b/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat index 14e68029abc3b..97a95478c5fea 100644 --- a/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat +++ b/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat @@ -8,3 +8,4 @@ org.elasticsearch.index.codec.vectors.es816.ES816HnswBinaryQuantizedVectorsForma org.elasticsearch.index.codec.vectors.es818.ES818BinaryQuantizedVectorsFormat org.elasticsearch.index.codec.vectors.es818.ES818HnswBinaryQuantizedVectorsFormat org.elasticsearch.index.codec.vectors.IVFVectorsFormat +org.elasticsearch.index.codec.vectors.GPUVectorsFormat diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/GPUVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/GPUVectorsFormatTests.java new file mode 100644 index 0000000000000..7f7c5623740e9 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/GPUVectorsFormatTests.java @@ -0,0 +1,76 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ +package org.elasticsearch.index.codec.vectors; + +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.FilterCodec; +import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.index.VectorEncoding; +import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; +import org.apache.lucene.tests.util.TestUtil; +import org.elasticsearch.common.logging.LogConfigurator; +import org.junit.Before; + +public class GPUVectorsFormatTests extends BaseKnnVectorsFormatTestCase { + + static { + LogConfigurator.loadLog4jPlugins(); + LogConfigurator.configureESLogging(); // native access requires logging to be initialized + } + KnnVectorsFormat format; + + @Before + @Override + public void setUp() throws Exception { + format = new GPUVectorsFormat(); + super.setUp(); + } + + @Override + protected VectorSimilarityFunction randomSimilarity() { + return VectorSimilarityFunction.values()[random().nextInt(VectorSimilarityFunction.values().length)]; + } + + @Override + protected VectorEncoding randomVectorEncoding() { + return VectorEncoding.FLOAT32; + } + + @Override + public void testSearchWithVisitedLimit() { + // TODO + } + + @Override + protected Codec getCodec() { + return TestUtil.alwaysKnnVectorsFormat(format); + } + + @Override + public void testAdvance() throws Exception { + // TODO + } + + public void testToString() { + FilterCodec customCodec = new FilterCodec("foo", Codec.getDefault()) { + @Override + public KnnVectorsFormat knnVectorsFormat() { + return new GPUVectorsFormat(); + } + }; + String expectedPattern = "GPUVectorsFormat()"; + assertEquals(expectedPattern, customCodec.knnVectorsFormat().toString()); + } + + @Override + public void testSortedIndexBytes() throws Exception { + super.testSortedIndexBytes(); + } +} diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java index 149c908fd380b..ef52a3a099879 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java @@ -66,6 +66,7 @@ import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH; import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN; import static org.elasticsearch.index.codec.vectors.IVFVectorsFormat.DYNAMIC_NPROBE; +import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.GPU_FORMAT; import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.IVF_FORMAT; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; @@ -1324,6 +1325,66 @@ protected void assertExistsQuery(MappedFieldType fieldType, Query query, LuceneD @Override public void testAggregatableConsistency() {} + public void testGPUParsing() throws IOException { + assumeTrue("feature flag [gpu_format] must be enabled", GPU_FORMAT.isEnabled()); + DocumentMapper mapperService = createDocumentMapper(fieldMapping(b -> { + b.field("type", "dense_vector"); + b.field("dims", 128); + b.field("index", true); + b.field("similarity", "dot_product"); + b.startObject("index_options"); + b.field("type", "gpu"); + b.endObject(); + })); + DenseVectorFieldMapper denseVectorFieldMapper = (DenseVectorFieldMapper) mapperService.mappers().getMapper("field"); + DenseVectorFieldMapper.GPUIndexOptions indexOptions = (DenseVectorFieldMapper.GPUIndexOptions) denseVectorFieldMapper.fieldType() + .getIndexOptions(); + // TODO: finish tests + } + + public void testGPUParsingFailureInRelease() { + assumeFalse("feature flag [gpu_format] must be disabled", GPU_FORMAT.isEnabled()); + + Exception e = expectThrows( + MapperParsingException.class, + () -> createDocumentMapper( + fieldMapping( + b -> b.field("type", "dense_vector").field("dims", dims).startObject("index_options").field("type", "gpu").endObject() + ) + ) + ); + assertThat(e.getMessage(), containsString("Unknown vector index options")); + } + + public void testKnnGPUVectorsFormat() throws IOException { + assumeTrue("feature flag [gpu_format] must be enabled", GPU_FORMAT.isEnabled()); + final int dims = randomIntBetween(64, 4096); + MapperService mapperService = createMapperService(fieldMapping(b -> { + b.field("type", "dense_vector"); + b.field("dims", dims); + b.field("index", true); + b.field("similarity", "dot_product"); + b.startObject("index_options"); + b.field("type", "gpu"); + b.endObject(); + })); + CodecService codecService = new CodecService(mapperService, BigArrays.NON_RECYCLING_INSTANCE); + Codec codec = codecService.codec("default"); + KnnVectorsFormat knnVectorsFormat; + if (CodecService.ZSTD_STORED_FIELDS_FEATURE_FLAG) { + assertThat(codec, instanceOf(PerFieldMapperCodec.class)); + knnVectorsFormat = ((PerFieldMapperCodec) codec).getKnnVectorsFormatForField("field"); + } else { + if (codec instanceof CodecService.DeduplicateFieldInfosCodec deduplicateFieldInfosCodec) { + codec = deduplicateFieldInfosCodec.delegate(); + } + assertThat(codec, instanceOf(LegacyPerFieldMapperCodec.class)); + knnVectorsFormat = ((LegacyPerFieldMapperCodec) codec).getKnnVectorsFormatForField("field"); + } + String expectedString = "GPUVectorsFormat()"; + assertEquals(expectedString, knnVectorsFormat.toString()); + } + public void testIVFParsing() throws IOException { assumeTrue("feature flag [ivf_format] must be enabled", IVF_FORMAT.isEnabled()); { From d0da0d4dc9794e0a9ac8bfba64fd27673be6b497 Mon Sep 17 00:00:00 2001 From: ChrisHegarty Date: Thu, 19 Jun 2025 15:04:37 +0100 Subject: [PATCH 002/109] move GPU plugin to x-pack --- .../search/query/RescoreKnnVectorQueryIT.java | 6 +- server/src/main/java/module-info.java | 3 +- .../index/mapper/DocumentParser.java | 3 +- .../index/mapper/DocumentParserContext.java | 5 + .../elasticsearch/index/mapper/Mapper.java | 12 + .../index/mapper/MapperRegistry.java | 13 +- .../index/mapper/MapperService.java | 1 + .../index/mapper/MappingParserContext.java | 10 + .../vectors/DenseVectorFieldMapper.java | 269 ++-- .../elasticsearch/indices/IndicesModule.java | 28 +- .../elasticsearch/plugins/MapperPlugin.java | 13 + .../org.apache.lucene.codecs.KnnVectorsFormat | 1 - .../elasticsearch/ReleaseVersionsTests.java | 12 + .../metadata/IndexMetadataVerifierTests.java | 3 +- .../index/IndexSettingsTests.java | 3 +- .../elasticsearch/index/codec/CodecTests.java | 3 +- .../index/mapper/MappingParserTests.java | 1 + .../index/mapper/ParametrizedMapperTests.java | 1 + .../index/mapper/TypeParsersTests.java | 1 + .../vectors/DenseVectorFieldMapperTests.java | 1321 +-------------- .../query/SearchExecutionContextTests.java | 1 + ...bstractDenseVectorFieldMapperTestcase.java | 1432 +++++++++++++++++ .../mapper/TestDocumentParserContext.java | 1 + .../aggregations/AggregatorTestCase.java | 3 +- x-pack/plugin/gpu/build.gradle | 21 + .../plugin/gpu/src/main/java/module-info.java | 16 + .../elasticsearch/xpack/gpu/GPUPlugin.java | 107 ++ .../xpack/gpu/codec}/GPUVectorsFormat.java | 10 +- .../xpack/gpu/codec}/GPUVectorsReader.java | 10 +- .../xpack/gpu/codec}/GPUVectorsWriter.java | 10 +- .../org.apache.lucene.codecs.KnnVectorsFormat | 2 + .../codec/GPUDenseVectorFieldMapperTests.java | 103 ++ .../gpu/codec}/GPUVectorsFormatTests.java | 17 +- 33 files changed, 1939 insertions(+), 1503 deletions(-) create mode 100644 test/framework/src/main/java/org/elasticsearch/index/mapper/AbstractDenseVectorFieldMapperTestcase.java create mode 100644 x-pack/plugin/gpu/build.gradle create mode 100644 x-pack/plugin/gpu/src/main/java/module-info.java create mode 100644 x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java rename {server/src/main/java/org/elasticsearch/index/codec/vectors => x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec}/GPUVectorsFormat.java (84%) rename {server/src/main/java/org/elasticsearch/index/codec/vectors => x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec}/GPUVectorsReader.java (95%) rename {server/src/main/java/org/elasticsearch/index/codec/vectors => x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec}/GPUVectorsWriter.java (93%) create mode 100644 x-pack/plugin/gpu/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat create mode 100644 x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java rename {server/src/test/java/org/elasticsearch/index/codec/vectors => x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec}/GPUVectorsFormatTests.java (87%) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/query/RescoreKnnVectorQueryIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/query/RescoreKnnVectorQueryIT.java index c8812cfc109f2..0332e9b8aebab 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/query/RescoreKnnVectorQueryIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/query/RescoreKnnVectorQueryIT.java @@ -17,7 +17,7 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; -import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.VectorIndexType; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.BasicVectorIndexType; import org.elasticsearch.index.mapper.vectors.DenseVectorScriptDocValues; import org.elasticsearch.index.query.MatchAllQueryBuilder; import org.elasticsearch.index.query.QueryBuilders; @@ -84,8 +84,8 @@ protected Map, Object>> pluginScripts() { @Before public void setup() throws IOException { String type = randomFrom( - Arrays.stream(VectorIndexType.values()) - .filter(VectorIndexType::isQuantized) + Arrays.stream(BasicVectorIndexType.values()) + .filter(BasicVectorIndexType::isQuantized) .map(t -> t.name().toLowerCase(Locale.ROOT)) .collect(Collectors.toCollection(ArrayList::new)) ); diff --git a/server/src/main/java/module-info.java b/server/src/main/java/module-info.java index 6b088676e2b5c..7b2b6d91fdc83 100644 --- a/server/src/main/java/module-info.java +++ b/server/src/main/java/module-info.java @@ -457,8 +457,7 @@ org.elasticsearch.index.codec.vectors.es816.ES816HnswBinaryQuantizedVectorsFormat, org.elasticsearch.index.codec.vectors.es818.ES818BinaryQuantizedVectorsFormat, org.elasticsearch.index.codec.vectors.es818.ES818HnswBinaryQuantizedVectorsFormat, - org.elasticsearch.index.codec.vectors.IVFVectorsFormat, - org.elasticsearch.index.codec.vectors.GPUVectorsFormat; + org.elasticsearch.index.codec.vectors.IVFVectorsFormat; provides org.apache.lucene.codecs.Codec with diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java index f877502fe8754..1f7f1c1518f02 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java @@ -804,7 +804,8 @@ private static void postProcessDynamicArrayMapping(DocumentParserContext context DenseVectorFieldMapper.Builder builder = new DenseVectorFieldMapper.Builder( fieldName, - context.indexSettings().getIndexVersionCreated() + context.indexSettings().getIndexVersionCreated(), + context::denseVectorIndexType ); DenseVectorFieldMapper denseVectorFieldMapper = builder.build(builderContext); context.updateDynamicMappers(fullFieldName, List.of(denseVectorFieldMapper)); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java index b77c0426c23d4..7ff8d31338ed7 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java @@ -19,6 +19,7 @@ import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.analysis.IndexAnalyzers; import org.elasticsearch.index.mapper.MapperService.MergeReason; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; import org.elasticsearch.xcontent.FilterXContentParserWrapper; import org.elasticsearch.xcontent.FlatteningXContentParser; import org.elasticsearch.xcontent.XContentBuilder; @@ -287,6 +288,10 @@ public final RootObjectMapper root() { return this.mappingLookup.getMapping().getRoot(); } + public final DenseVectorFieldMapper.VectorIndexType denseVectorIndexType(String type) { + return mappingParserContext.denseVectorIndexType(type); + } + public final ObjectMapper parent() { return parent; } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/Mapper.java b/server/src/main/java/org/elasticsearch/index/mapper/Mapper.java index cf3261d88bf10..a2d2b821e209d 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/Mapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/Mapper.java @@ -131,6 +131,18 @@ default boolean supportsVersion(IndexVersion indexCreatedVersion) { } } + // public interface DenseVectorTypeParser { + // DenseVectorFieldMapper parse(String name, Map node, MappingParserContext parserContext) + // throws MapperParsingException; + // + // /** + // * Whether we can parse this type on indices with the given index created version. + // */ + // default boolean supportsVersion(IndexVersion indexCreatedVersion) { + // return indexCreatedVersion.onOrAfter(IndexVersions.MINIMUM_READONLY_COMPATIBLE); + // } + // } + private final String leafName; @SuppressWarnings("this-escape") diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperRegistry.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperRegistry.java index 44f7def74ec0e..8c8db12b23745 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperRegistry.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperRegistry.java @@ -11,6 +11,7 @@ import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.IndexVersions; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; import org.elasticsearch.plugins.FieldPredicate; import org.elasticsearch.plugins.MapperPlugin; @@ -31,12 +32,14 @@ public final class MapperRegistry { private final Map metadataMapperParsers6x; private final Map metadataMapperParsers5x; private final Function fieldFilter; + private final Map denseVectorIndexTypes; public MapperRegistry( Map mapperParsers, Map runtimeFieldParsers, Map metadataMapperParsers, - Function fieldFilter + Function fieldFilter, + Map denseVectorIndexTypes ) { this.mapperParsers = Collections.unmodifiableMap(new LinkedHashMap<>(mapperParsers)); this.runtimeFieldParsers = runtimeFieldParsers; @@ -50,6 +53,7 @@ public MapperRegistry( metadata5x.put(LegacyTypeFieldMapper.NAME, LegacyTypeFieldMapper.PARSER); this.metadataMapperParsers5x = metadata5x; this.fieldFilter = fieldFilter; + this.denseVectorIndexTypes = Collections.unmodifiableMap(denseVectorIndexTypes); } /** @@ -100,4 +104,11 @@ public Map getMetadataMapperParsers(Inde public Function getFieldFilter() { return fieldFilter; } + + /** + * Returns ... + */ + public DenseVectorFieldMapper.VectorIndexType getDenseVectorIndexType(String type) { + return denseVectorIndexTypes.get(type); + } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperService.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperService.java index 7958fd8e51525..aa0f22b442973 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperService.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperService.java @@ -238,6 +238,7 @@ public MapperService( similarityService::getSimilarity, type -> mapperRegistry.getMapperParser(type, indexVersionCreated), mapperRegistry.getRuntimeFieldParsers()::get, + mapperRegistry::getDenseVectorIndexType, indexVersionCreated, clusterTransportVersion, searchExecutionContextSupplier, diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappingParserContext.java b/server/src/main/java/org/elasticsearch/index/mapper/MappingParserContext.java index f74a257f32921..4605712a3903f 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappingParserContext.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappingParserContext.java @@ -17,6 +17,7 @@ import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.analysis.IndexAnalyzers; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.index.similarity.SimilarityProvider; import org.elasticsearch.script.ScriptCompiler; @@ -33,6 +34,7 @@ public class MappingParserContext { private final Function similarityLookupService; private final Function typeParsers; private final Function runtimeFieldParsers; + private final Function denseVectorIndexType; private final IndexVersion indexVersionCreated; private final Supplier clusterTransportVersion; private final Supplier searchExecutionContextSupplier; @@ -48,6 +50,7 @@ public MappingParserContext( Function similarityLookupService, Function typeParsers, Function runtimeFieldParsers, + Function denseVectorIndexType, IndexVersion indexVersionCreated, Supplier clusterTransportVersion, Supplier searchExecutionContextSupplier, @@ -60,6 +63,7 @@ public MappingParserContext( this.similarityLookupService = similarityLookupService; this.typeParsers = typeParsers; this.runtimeFieldParsers = runtimeFieldParsers; + this.denseVectorIndexType = denseVectorIndexType; this.indexVersionCreated = indexVersionCreated; this.clusterTransportVersion = clusterTransportVersion; this.searchExecutionContextSupplier = searchExecutionContextSupplier; @@ -99,6 +103,10 @@ public RuntimeField.Parser runtimeFieldParser(String type) { return runtimeFieldParsers.apply(type); } + public DenseVectorFieldMapper.VectorIndexType denseVectorIndexType(String type) { + return denseVectorIndexType.apply(type); + } + public IndexVersion indexVersionCreated() { return indexVersionCreated; } @@ -163,6 +171,7 @@ private static class MultiFieldParserContext extends MappingParserContext { in.similarityLookupService, in.typeParsers, in.runtimeFieldParsers, + null, // TODO: serialization in.indexVersionCreated, in.clusterTransportVersion, in.searchExecutionContextSupplier, @@ -193,6 +202,7 @@ private static class DynamicTemplateParserContext extends MappingParserContext { in.similarityLookupService, in.typeParsers, in.runtimeFieldParsers, + null, // TODO: serialization in.indexVersionCreated, in.clusterTransportVersion, in.searchExecutionContextSupplier, diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java index 48f125130bb1d..33670e555716b 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java @@ -49,7 +49,6 @@ import org.elasticsearch.index.codec.vectors.ES814HnswScalarQuantizedVectorsFormat; import org.elasticsearch.index.codec.vectors.ES815BitFlatVectorFormat; import org.elasticsearch.index.codec.vectors.ES815HnswBitVectorsFormat; -import org.elasticsearch.index.codec.vectors.GPUVectorsFormat; import org.elasticsearch.index.codec.vectors.IVFVectorsFormat; import org.elasticsearch.index.codec.vectors.es818.ES818BinaryQuantizedVectorsFormat; import org.elasticsearch.index.codec.vectors.es818.ES818HnswBinaryQuantizedVectorsFormat; @@ -106,6 +105,7 @@ import java.util.Set; import java.util.function.Function; import java.util.function.Supplier; +import java.util.stream.Collectors; import java.util.stream.Stream; import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_INDEX_VERSION_CREATED; @@ -123,7 +123,6 @@ public class DenseVectorFieldMapper extends FieldMapper { public static final int BBQ_MIN_DIMS = 64; public static final FeatureFlag IVF_FORMAT = new FeatureFlag("ivf_format"); - public static final FeatureFlag GPU_FORMAT = new FeatureFlag("gpu_format"); public static boolean isNotUnitVector(float magnitude) { return Math.abs(magnitude - 1.0f) > EPS; @@ -174,6 +173,8 @@ public KnnSearchStrategy getKnnSearchStrategy() { Setting.Property.Dynamic ); + // public static final SetOnce> DENSE_VECTOR_INDEX_TYPES = new SetOnce<>(); + private static boolean hasRescoreIndexVersion(IndexVersion version) { return version.onOrAfter(IndexVersions.ADD_RESCORE_PARAMS_TO_QUANTIZED_VECTORS) || version.between(IndexVersions.ADD_RESCORE_PARAMS_TO_QUANTIZED_VECTORS_BACKPORT_8_X, IndexVersions.UPGRADE_TO_LUCENE_10_0_0); @@ -266,9 +267,16 @@ public static class Builder extends FieldMapper.Builder { final IndexVersion indexVersionCreated; - public Builder(String name, IndexVersion indexVersionCreated) { + final Function denseVectorIndexType; + + public Builder( + String name, + IndexVersion indexVersionCreated, + Function denseVectorIndexType + ) { super(name); this.indexVersionCreated = indexVersionCreated; + this.denseVectorIndexType = denseVectorIndexType; final boolean indexedByDefault = indexVersionCreated.onOrAfter(INDEXED_BY_DEFAULT_INDEX_VERSION); final boolean defaultInt8Hnsw = indexVersionCreated.onOrAfter(IndexVersions.DEFAULT_DENSE_VECTOR_TO_INT8_HNSW); this.indexed = Parameter.indexParam(m -> toType(m).fieldType().indexed, indexedByDefault); @@ -308,7 +316,7 @@ public Builder(String name, IndexVersion indexVersionCreated) { null ) : null, - (n, c, o) -> o == null ? null : parseIndexOptions(n, o, indexVersionCreated), + (n, c, o) -> o == null ? null : parseIndexOptions(n, o, indexVersionCreated, denseVectorIndexType), m -> toType(m).indexOptions, (b, n, v) -> { if (v != null) { @@ -399,7 +407,8 @@ public DenseVectorFieldMapper build(MapperBuilderContext context) { ), builderParams(this, context), indexOptions.getValue(), - indexVersionCreated + indexVersionCreated, + denseVectorIndexType ); } } @@ -1312,13 +1321,17 @@ public final String toString() { } public abstract static class IndexOptions implements ToXContent { - final VectorIndexType type; + protected final VectorIndexType type; - IndexOptions(VectorIndexType type) { + protected IndexOptions(VectorIndexType type) { this.type = type; } - abstract KnnVectorsFormat getVectorsFormat(ElementType elementType); + public VectorIndexType type() { + return type; + } + + public abstract KnnVectorsFormat getVectorsFormat(ElementType elementType); public boolean validate(ElementType elementType, int dim, boolean throwOnError) { return validateElementType(elementType, throwOnError) && validateDimension(dim, throwOnError); @@ -1338,7 +1351,7 @@ final boolean validateElementType(ElementType elementType, boolean throwOnError) return validElementType; } - abstract boolean updatableTo(IndexOptions update); + public abstract boolean updatableTo(IndexOptions update); public boolean validateDimension(int dim) { return validateDimension(dim, true); @@ -1347,14 +1360,14 @@ public boolean validateDimension(int dim) { public boolean validateDimension(int dim, boolean throwOnError) { boolean supportsDimension = type.supportsDimension(dim); if (throwOnError && supportsDimension == false) { - throw new IllegalArgumentException(type.name + " only supports even dimensions; provided=" + dim); + throw new IllegalArgumentException(type.name() + " only supports even dimensions; provided=" + dim); } return supportsDimension; } - abstract boolean doEquals(IndexOptions other); + protected abstract boolean doEquals(IndexOptions other); - abstract int doHashCode(); + protected abstract int doHashCode(); @Override public final boolean equals(Object other) { @@ -1377,13 +1390,32 @@ public final int hashCode() { abstract static class QuantizedIndexOptions extends IndexOptions { final RescoreVector rescoreVector; - QuantizedIndexOptions(VectorIndexType type, RescoreVector rescoreVector) { + QuantizedIndexOptions(BasicVectorIndexType type, RescoreVector rescoreVector) { super(type); this.rescoreVector = rescoreVector; } } - public enum VectorIndexType { + public interface VectorIndexType { + + String name(); + + IndexOptions parseIndexOptions(String fieldName, Map indexOptionsMap, IndexVersion indexVersion); + + boolean supportsElementType(ElementType elementType); + + boolean supportsDimension(int dims); + + boolean isQuantized(); + } + + public static Map allBasicVectorIndexTypes() { + return Stream.of(BasicVectorIndexType.values()) + .filter(vectorIndexType -> vectorIndexType != BasicVectorIndexType.BBQ_IVF || IVF_FORMAT.isEnabled()) + .collect(Collectors.toMap(type -> type.name, Function.identity())); + } + + public enum BasicVectorIndexType implements VectorIndexType { HNSW("hnsw", false) { @Override public IndexOptions parseIndexOptions(String fieldName, Map indexOptionsMap, IndexVersion indexVersion) { @@ -1650,33 +1682,15 @@ public boolean supportsElementType(ElementType elementType) { return elementType == ElementType.FLOAT; } - @Override - public boolean supportsDimension(int dims) { - return true; - } - }, - GPU("gpu", false) { - @Override - public IndexOptions parseIndexOptions(String fieldName, Map indexOptionsMap, IndexVersion indexVersion) { - MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap); - return new GPUIndexOptions(); - } - - @Override - public boolean supportsElementType(ElementType elementType) { - return elementType == ElementType.FLOAT; - } - @Override public boolean supportsDimension(int dims) { return true; } }; - static Optional fromString(String type) { - return Stream.of(VectorIndexType.values()) - .filter(vectorIndexType -> vectorIndexType != VectorIndexType.BBQ_IVF || IVF_FORMAT.isEnabled()) - .filter(vectorIndexType -> vectorIndexType != VectorIndexType.GPU || GPU_FORMAT.isEnabled()) + static Optional fromString(String type) { + return Stream.of(BasicVectorIndexType.values()) + .filter(vectorIndexType -> vectorIndexType != BasicVectorIndexType.BBQ_IVF || IVF_FORMAT.isEnabled()) .filter(vectorIndexType -> vectorIndexType.name.equals(type)) .findFirst(); } @@ -1684,17 +1698,11 @@ static Optional fromString(String type) { private final String name; private final boolean quantized; - VectorIndexType(String name, boolean quantized) { + BasicVectorIndexType(String name, boolean quantized) { this.name = name; this.quantized = quantized; } - abstract IndexOptions parseIndexOptions(String fieldName, Map indexOptionsMap, IndexVersion indexVersion); - - public abstract boolean supportsElementType(ElementType elementType); - - public abstract boolean supportsDimension(int dims); - public boolean isQuantized() { return quantized; } @@ -1709,7 +1717,7 @@ static class Int8FlatIndexOptions extends QuantizedIndexOptions { private final Float confidenceInterval; Int8FlatIndexOptions(Float confidenceInterval, RescoreVector rescoreVector) { - super(VectorIndexType.INT8_FLAT, rescoreVector); + super(BasicVectorIndexType.INT8_FLAT, rescoreVector); this.confidenceInterval = confidenceInterval; } @@ -1728,38 +1736,38 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws } @Override - KnnVectorsFormat getVectorsFormat(ElementType elementType) { + public KnnVectorsFormat getVectorsFormat(ElementType elementType) { assert elementType == ElementType.FLOAT; return new ES813Int8FlatVectorFormat(confidenceInterval, 7, false); } @Override - boolean doEquals(IndexOptions o) { + public boolean doEquals(IndexOptions o) { Int8FlatIndexOptions that = (Int8FlatIndexOptions) o; return Objects.equals(confidenceInterval, that.confidenceInterval) && Objects.equals(rescoreVector, that.rescoreVector); } @Override - int doHashCode() { + public int doHashCode() { return Objects.hash(confidenceInterval, rescoreVector); } @Override - boolean updatableTo(IndexOptions update) { + public boolean updatableTo(IndexOptions update) { return update.type.equals(this.type) - || update.type.equals(VectorIndexType.HNSW) - || update.type.equals(VectorIndexType.INT8_HNSW) - || update.type.equals(VectorIndexType.INT4_HNSW) - || update.type.equals(VectorIndexType.BBQ_HNSW) - || update.type.equals(VectorIndexType.INT4_FLAT) - || update.type.equals(VectorIndexType.BBQ_FLAT); + || update.type.equals(BasicVectorIndexType.HNSW) + || update.type.equals(BasicVectorIndexType.INT8_HNSW) + || update.type.equals(BasicVectorIndexType.INT4_HNSW) + || update.type.equals(BasicVectorIndexType.BBQ_HNSW) + || update.type.equals(BasicVectorIndexType.INT4_FLAT) + || update.type.equals(BasicVectorIndexType.BBQ_FLAT); } } static class FlatIndexOptions extends IndexOptions { FlatIndexOptions() { - super(VectorIndexType.FLAT); + super(BasicVectorIndexType.FLAT); } @Override @@ -1771,7 +1779,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws } @Override - KnnVectorsFormat getVectorsFormat(ElementType elementType) { + public KnnVectorsFormat getVectorsFormat(ElementType elementType) { if (elementType.equals(ElementType.BIT)) { return new ES815BitFlatVectorFormat(); } @@ -1779,7 +1787,7 @@ KnnVectorsFormat getVectorsFormat(ElementType elementType) { } @Override - boolean updatableTo(IndexOptions update) { + public boolean updatableTo(IndexOptions update) { return true; } @@ -1794,49 +1802,13 @@ public int doHashCode() { } } - static class GPUIndexOptions extends IndexOptions { - - GPUIndexOptions() { - super(VectorIndexType.GPU); - } - - @Override - public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - builder.startObject(); - builder.field("type", type); - builder.endObject(); - return builder; - } - - @Override - KnnVectorsFormat getVectorsFormat(ElementType elementType) { - assert elementType == ElementType.FLOAT; - return new GPUVectorsFormat(); - } - - @Override - boolean updatableTo(IndexOptions update) { - return false; - } - - @Override - public boolean doEquals(IndexOptions o) { - return o instanceof GPUIndexOptions; - } - - @Override - public int doHashCode() { - return Objects.hash(type); - } - } - static class Int4HnswIndexOptions extends QuantizedIndexOptions { private final int m; private final int efConstruction; private final float confidenceInterval; Int4HnswIndexOptions(int m, int efConstruction, Float confidenceInterval, RescoreVector rescoreVector) { - super(VectorIndexType.INT4_HNSW, rescoreVector); + super(BasicVectorIndexType.INT4_HNSW, rescoreVector); this.m = m; this.efConstruction = efConstruction; // The default confidence interval for int4 is dynamic quantiles, this provides the best relevancy and is @@ -1894,14 +1866,14 @@ public String toString() { } @Override - boolean updatableTo(IndexOptions update) { + public boolean updatableTo(IndexOptions update) { boolean updatable = false; - if (update.type.equals(VectorIndexType.INT4_HNSW)) { + if (update.type.equals(BasicVectorIndexType.INT4_HNSW)) { Int4HnswIndexOptions int4HnswIndexOptions = (Int4HnswIndexOptions) update; // fewer connections would break assumptions on max number of connections (based on largest previous graph) during merge // quantization could not behave as expected with different confidence intervals (and quantiles) to be created updatable = int4HnswIndexOptions.m >= this.m && confidenceInterval == int4HnswIndexOptions.confidenceInterval; - } else if (update.type.equals(VectorIndexType.BBQ_HNSW)) { + } else if (update.type.equals(BasicVectorIndexType.BBQ_HNSW)) { updatable = ((BBQHnswIndexOptions) update).m >= m; } return updatable; @@ -1912,7 +1884,7 @@ static class Int4FlatIndexOptions extends QuantizedIndexOptions { private final float confidenceInterval; Int4FlatIndexOptions(Float confidenceInterval, RescoreVector rescoreVector) { - super(VectorIndexType.INT4_FLAT, rescoreVector); + super(BasicVectorIndexType.INT4_FLAT, rescoreVector); // The default confidence interval for int4 is dynamic quantiles, this provides the best relevancy and is // effectively required for int4 to behave well across a wide range of data. this.confidenceInterval = confidenceInterval == null ? 0f : confidenceInterval; @@ -1955,14 +1927,14 @@ public String toString() { } @Override - boolean updatableTo(IndexOptions update) { + public boolean updatableTo(IndexOptions update) { // TODO: add support for updating from flat, hnsw, and int8_hnsw and updating params return update.type.equals(this.type) - || update.type.equals(VectorIndexType.HNSW) - || update.type.equals(VectorIndexType.INT8_HNSW) - || update.type.equals(VectorIndexType.INT4_HNSW) - || update.type.equals(VectorIndexType.BBQ_HNSW) - || update.type.equals(VectorIndexType.BBQ_FLAT); + || update.type.equals(BasicVectorIndexType.HNSW) + || update.type.equals(BasicVectorIndexType.INT8_HNSW) + || update.type.equals(BasicVectorIndexType.INT4_HNSW) + || update.type.equals(BasicVectorIndexType.BBQ_HNSW) + || update.type.equals(BasicVectorIndexType.BBQ_FLAT); } } @@ -1973,7 +1945,7 @@ public static class Int8HnswIndexOptions extends QuantizedIndexOptions { private final Float confidenceInterval; public Int8HnswIndexOptions(int m, int efConstruction, Float confidenceInterval, RescoreVector rescoreVector) { - super(VectorIndexType.INT8_HNSW, rescoreVector); + super(BasicVectorIndexType.INT8_HNSW, rescoreVector); this.m = m; this.efConstruction = efConstruction; this.confidenceInterval = confidenceInterval; @@ -2033,7 +2005,7 @@ public String toString() { } @Override - boolean updatableTo(IndexOptions update) { + public boolean updatableTo(IndexOptions update) { boolean updatable; if (update.type.equals(this.type)) { Int8HnswIndexOptions int8HnswIndexOptions = (Int8HnswIndexOptions) update; @@ -2044,8 +2016,8 @@ boolean updatableTo(IndexOptions update) { || int8HnswIndexOptions.confidenceInterval != null && confidenceInterval.equals(int8HnswIndexOptions.confidenceInterval); } else { - updatable = update.type.equals(VectorIndexType.INT4_HNSW) && ((Int4HnswIndexOptions) update).m >= this.m - || (update.type.equals(VectorIndexType.BBQ_HNSW) && ((BBQHnswIndexOptions) update).m >= m); + updatable = update.type.equals(BasicVectorIndexType.INT4_HNSW) && ((Int4HnswIndexOptions) update).m >= this.m + || (update.type.equals(BasicVectorIndexType.BBQ_HNSW) && ((BBQHnswIndexOptions) update).m >= m); } return updatable; } @@ -2056,7 +2028,7 @@ static class HnswIndexOptions extends IndexOptions { private final int efConstruction; HnswIndexOptions(int m, int efConstruction) { - super(VectorIndexType.HNSW); + super(BasicVectorIndexType.HNSW); this.m = m; this.efConstruction = efConstruction; } @@ -2070,7 +2042,7 @@ public KnnVectorsFormat getVectorsFormat(ElementType elementType) { } @Override - boolean updatableTo(IndexOptions update) { + public boolean updatableTo(IndexOptions update) { boolean updatable = update.type.equals(this.type); if (updatable) { // fewer connections would break assumptions on max number of connections (based on largest previous graph) during merge @@ -2078,9 +2050,9 @@ boolean updatableTo(IndexOptions update) { updatable = hnswIndexOptions.m >= this.m; } return updatable - || (update.type.equals(VectorIndexType.INT8_HNSW) && ((Int8HnswIndexOptions) update).m >= m) - || (update.type.equals(VectorIndexType.INT4_HNSW) && ((Int4HnswIndexOptions) update).m >= m) - || (update.type.equals(VectorIndexType.BBQ_HNSW) && ((BBQHnswIndexOptions) update).m >= m); + || (update.type.equals(BasicVectorIndexType.INT8_HNSW) && ((Int8HnswIndexOptions) update).m >= m) + || (update.type.equals(BasicVectorIndexType.INT4_HNSW) && ((Int4HnswIndexOptions) update).m >= m) + || (update.type.equals(BasicVectorIndexType.BBQ_HNSW) && ((BBQHnswIndexOptions) update).m >= m); } @Override @@ -2117,30 +2089,30 @@ public static class BBQHnswIndexOptions extends QuantizedIndexOptions { private final int efConstruction; public BBQHnswIndexOptions(int m, int efConstruction, RescoreVector rescoreVector) { - super(VectorIndexType.BBQ_HNSW, rescoreVector); + super(BasicVectorIndexType.BBQ_HNSW, rescoreVector); this.m = m; this.efConstruction = efConstruction; } @Override - KnnVectorsFormat getVectorsFormat(ElementType elementType) { + public KnnVectorsFormat getVectorsFormat(ElementType elementType) { assert elementType == ElementType.FLOAT; return new ES818HnswBinaryQuantizedVectorsFormat(m, efConstruction); } @Override - boolean updatableTo(IndexOptions update) { + public boolean updatableTo(IndexOptions update) { return update.type.equals(this.type) && ((BBQHnswIndexOptions) update).m >= this.m; } @Override - boolean doEquals(IndexOptions other) { + protected boolean doEquals(IndexOptions other) { BBQHnswIndexOptions that = (BBQHnswIndexOptions) other; return m == that.m && efConstruction == that.efConstruction && Objects.equals(rescoreVector, that.rescoreVector); } @Override - int doHashCode() { + protected int doHashCode() { return Objects.hash(m, efConstruction, rescoreVector); } @@ -2162,7 +2134,7 @@ public boolean validateDimension(int dim, boolean throwOnError) { boolean supportsDimension = type.supportsDimension(dim); if (throwOnError && supportsDimension == false) { throw new IllegalArgumentException( - type.name + " does not support dimensions fewer than " + BBQ_MIN_DIMS + "; provided=" + dim + type.name() + " does not support dimensions fewer than " + BBQ_MIN_DIMS + "; provided=" + dim ); } return supportsDimension; @@ -2173,27 +2145,27 @@ static class BBQFlatIndexOptions extends QuantizedIndexOptions { private final int CLASS_NAME_HASH = this.getClass().getName().hashCode(); BBQFlatIndexOptions(RescoreVector rescoreVector) { - super(VectorIndexType.BBQ_FLAT, rescoreVector); + super(BasicVectorIndexType.BBQ_FLAT, rescoreVector); } @Override - KnnVectorsFormat getVectorsFormat(ElementType elementType) { + public KnnVectorsFormat getVectorsFormat(ElementType elementType) { assert elementType == ElementType.FLOAT; return new ES818BinaryQuantizedVectorsFormat(); } @Override - boolean updatableTo(IndexOptions update) { - return update.type.equals(this.type) || update.type.equals(VectorIndexType.BBQ_HNSW); + public boolean updatableTo(IndexOptions update) { + return update.type.equals(this.type) || update.type.equals(BasicVectorIndexType.BBQ_HNSW); } @Override - boolean doEquals(IndexOptions other) { + protected boolean doEquals(IndexOptions other) { return other instanceof BBQFlatIndexOptions; } @Override - int doHashCode() { + protected int doHashCode() { return CLASS_NAME_HASH; } @@ -2213,7 +2185,7 @@ public boolean validateDimension(int dim, boolean throwOnError) { boolean supportsDimension = type.supportsDimension(dim); if (throwOnError && supportsDimension == false) { throw new IllegalArgumentException( - type.name + " does not support dimensions fewer than " + BBQ_MIN_DIMS + "; provided=" + dim + type.name() + " does not support dimensions fewer than " + BBQ_MIN_DIMS + "; provided=" + dim ); } return supportsDimension; @@ -2226,24 +2198,24 @@ static class BBQIVFIndexOptions extends QuantizedIndexOptions { final int defaultNProbe; BBQIVFIndexOptions(int clusterSize, int defaultNProbe, RescoreVector rescoreVector) { - super(VectorIndexType.BBQ_IVF, rescoreVector); + super(BasicVectorIndexType.BBQ_IVF, rescoreVector); this.clusterSize = clusterSize; this.defaultNProbe = defaultNProbe; } @Override - KnnVectorsFormat getVectorsFormat(ElementType elementType) { + public KnnVectorsFormat getVectorsFormat(ElementType elementType) { assert elementType == ElementType.FLOAT; return new IVFVectorsFormat(clusterSize); } @Override - boolean updatableTo(IndexOptions update) { + public boolean updatableTo(IndexOptions update) { return update.type.equals(this.type); } @Override - boolean doEquals(IndexOptions other) { + protected boolean doEquals(IndexOptions other) { BBQIVFIndexOptions that = (BBQIVFIndexOptions) other; return clusterSize == that.clusterSize && defaultNProbe == that.defaultNProbe @@ -2251,7 +2223,7 @@ boolean doEquals(IndexOptions other) { } @Override - int doHashCode() { + protected int doHashCode() { return Objects.hash(clusterSize, defaultNProbe, rescoreVector); } @@ -2304,10 +2276,14 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws } } - public static final TypeParser PARSER = new TypeParser( - (n, c) -> new Builder(n, c.indexVersionCreated()), - notInMultiFields(CONTENT_TYPE) - ); + public static final TypeParser parser(Function denseVectorIndexType) { + return new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated(), denseVectorIndexType), notInMultiFields(CONTENT_TYPE)); + } + + // public static final TypeParser PARSER = new TypeParser( + // (n, c) -> new Builder(n, c.indexVersionCreated()), + // notInMultiFields(CONTENT_TYPE) + // ); public static final class DenseVectorFieldType extends SimpleMappedFieldType { private final ElementType elementType; @@ -2630,15 +2606,15 @@ && isNotUnitVector(squaredMagnitude)) { return knnQuery; } - VectorSimilarity getSimilarity() { + public VectorSimilarity getSimilarity() { return similarity; } - int getVectorDimensions() { + public int getVectorDimensions() { return dims; } - ElementType getElementType() { + public ElementType getElementType() { return elementType; } @@ -2692,17 +2668,20 @@ public List fetchValues(Source source, int doc, List ignoredValu private final IndexOptions indexOptions; private final IndexVersion indexCreatedVersion; + private final Function denseVectorIndexType; private DenseVectorFieldMapper( String simpleName, MappedFieldType mappedFieldType, BuilderParams params, IndexOptions indexOptions, - IndexVersion indexCreatedVersion + IndexVersion indexCreatedVersion, + Function denseVectorIndexType ) { super(simpleName, mappedFieldType, params); this.indexOptions = indexOptions; this.indexCreatedVersion = indexCreatedVersion; + this.denseVectorIndexType = denseVectorIndexType; } @Override @@ -2750,7 +2729,8 @@ public void parse(DocumentParserContext context) throws IOException { updatedDenseVectorFieldType, builderParams, indexOptions, - indexCreatedVersion + indexCreatedVersion, + denseVectorIndexType ); context.addDynamicMapper(update); return; @@ -2842,10 +2822,15 @@ protected String contentType() { @Override public FieldMapper.Builder getMergeBuilder() { - return new Builder(leafName(), indexCreatedVersion).init(this); + return new Builder(leafName(), indexCreatedVersion, denseVectorIndexType).init(this); } - private static IndexOptions parseIndexOptions(String fieldName, Object propNode, IndexVersion indexVersion) { + private static IndexOptions parseIndexOptions( + String fieldName, + Object propNode, + IndexVersion indexVersion, + Function denseVectorIndexType + ) { @SuppressWarnings("unchecked") Map indexOptionsMap = (Map) propNode; Object typeNode = indexOptionsMap.remove("type"); @@ -2853,7 +2838,7 @@ private static IndexOptions parseIndexOptions(String fieldName, Object propNode, throw new MapperParsingException("[index_options] requires field [type] to be configured"); } String type = XContentMapValues.nodeStringValue(typeNode); - Optional vectorIndexType = VectorIndexType.fromString(type); + Optional vectorIndexType = Optional.ofNullable(denseVectorIndexType.apply(type)); if (vectorIndexType.isEmpty()) { throw new MapperParsingException("Unknown vector index options type [" + type + "] for field [" + fieldName + "]"); } diff --git a/server/src/main/java/org/elasticsearch/indices/IndicesModule.java b/server/src/main/java/org/elasticsearch/indices/IndicesModule.java index 09be98630d5c4..2598fcc868496 100644 --- a/server/src/main/java/org/elasticsearch/indices/IndicesModule.java +++ b/server/src/main/java/org/elasticsearch/indices/IndicesModule.java @@ -95,11 +95,13 @@ public class IndicesModule extends AbstractModule { private final MapperRegistry mapperRegistry; public IndicesModule(List mapperPlugins) { + var denseVectorIndexTypes = getDenseVectorIndexTypes(mapperPlugins); this.mapperRegistry = new MapperRegistry( - getMappers(mapperPlugins), + getMappers(mapperPlugins, denseVectorIndexTypes), getRuntimeFields(mapperPlugins), getMetadataMappers(mapperPlugins), - getFieldFilter(mapperPlugins) + getFieldFilter(mapperPlugins), + denseVectorIndexTypes ); } @@ -179,7 +181,10 @@ public static List getNamedXContents() { ); } - public static Map getMappers(List mapperPlugins) { + public static Map getMappers( + List mapperPlugins, + Map denseVectorIndexTypes + ) { Map mappers = new LinkedHashMap<>(); // builtin mappers @@ -208,7 +213,7 @@ public static Map getMappers(List mappe mappers.put(PassThroughObjectMapper.CONTENT_TYPE, new PassThroughObjectMapper.TypeParser()); mappers.put(TextFieldMapper.CONTENT_TYPE, TextFieldMapper.PARSER); - mappers.put(DenseVectorFieldMapper.CONTENT_TYPE, DenseVectorFieldMapper.PARSER); + mappers.put(DenseVectorFieldMapper.CONTENT_TYPE, DenseVectorFieldMapper.parser(denseVectorIndexTypes::get)); mappers.put(SparseVectorFieldMapper.CONTENT_TYPE, SparseVectorFieldMapper.PARSER); for (MapperPlugin mapperPlugin : mapperPlugins) { @@ -221,6 +226,21 @@ public static Map getMappers(List mappe return Collections.unmodifiableMap(mappers); } + private static Map getDenseVectorIndexTypes(List mapperPlugins) { + Map indexTypes = new LinkedHashMap<>( + DenseVectorFieldMapper.allBasicVectorIndexTypes() + ); + + for (MapperPlugin mapperPlugin : mapperPlugins) { + for (var entry : mapperPlugin.getDenseVectorIndexTypes().entrySet()) { + if (indexTypes.put(entry.getKey(), entry.getValue()) != null) { + throw new IllegalArgumentException("DenseVectorIndexType [" + entry.getKey() + "] is already registered"); + } + } + } + return Collections.unmodifiableMap(indexTypes); + } + private static Map getRuntimeFields(List mapperPlugins) { Map runtimeParsers = new LinkedHashMap<>(); runtimeParsers.put(BooleanFieldMapper.CONTENT_TYPE, BooleanScriptFieldType.PARSER); diff --git a/server/src/main/java/org/elasticsearch/plugins/MapperPlugin.java b/server/src/main/java/org/elasticsearch/plugins/MapperPlugin.java index c82bc286a90c8..9d5679d0b3eaa 100644 --- a/server/src/main/java/org/elasticsearch/plugins/MapperPlugin.java +++ b/server/src/main/java/org/elasticsearch/plugins/MapperPlugin.java @@ -12,6 +12,7 @@ import org.elasticsearch.index.mapper.Mapper; import org.elasticsearch.index.mapper.MetadataFieldMapper; import org.elasticsearch.index.mapper.RuntimeField; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; import java.util.Collections; import java.util.Map; @@ -33,6 +34,18 @@ default Map getMappers() { return Collections.emptyMap(); } + /** + * Returns... + */ + // default Map getDenseVectorMappers() { + // default Map getDenseVectorMappers() { + // return Collections.emptyMap(); + // } + + default Map getDenseVectorIndexTypes() { + return Collections.emptyMap(); + } + /** * Returns the runtime field implementations added by this plugin. *

diff --git a/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat b/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat index 97a95478c5fea..14e68029abc3b 100644 --- a/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat +++ b/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat @@ -8,4 +8,3 @@ org.elasticsearch.index.codec.vectors.es816.ES816HnswBinaryQuantizedVectorsForma org.elasticsearch.index.codec.vectors.es818.ES818BinaryQuantizedVectorsFormat org.elasticsearch.index.codec.vectors.es818.ES818HnswBinaryQuantizedVectorsFormat org.elasticsearch.index.codec.vectors.IVFVectorsFormat -org.elasticsearch.index.codec.vectors.GPUVectorsFormat diff --git a/server/src/test/java/org/elasticsearch/ReleaseVersionsTests.java b/server/src/test/java/org/elasticsearch/ReleaseVersionsTests.java index 21ef95a36e3d3..7d027833fa12e 100644 --- a/server/src/test/java/org/elasticsearch/ReleaseVersionsTests.java +++ b/server/src/test/java/org/elasticsearch/ReleaseVersionsTests.java @@ -34,4 +34,16 @@ public void testReturnsRange() { assertThat(versions.apply(9), equalTo("0.0.0")); assertThat(versions.apply(24), equalTo(new Version(Version.CURRENT.id + 100) + "-[24]")); } + + // public void testFoo() { + // + // String s = IndexVersions.MAPPER_TEXT_MATCH_ONLY_MULTI_FIELDS_DEFAULT_NOT_STORED.toReleaseVersion().split("-")[0]; + // //var s = IndexVersions.VERSION_LOOKUP.apply(IndexVersions.MAPPER_TEXT_MATCH_ONLY_MULTI_FIELDS_DEFAULT_NOT_STORED.id()); + // System.out.println("MAPPER_TEXT_MATCH_ONLY_MULTI_FIELDS_DEFAULT_NOT_STORED=" + s); + // + // s = IndexVersions.UPGRADE_TO_LUCENE_10_2_2.toReleaseVersion().split("-")[0]; + // // s = IndexVersions.VERSION_LOOKUP.apply(IndexVersions.UPGRADE_TO_LUCENE_10_2_2.id()); + // System.out.println("UPGRADE_TO_LUCENE_10_2_2=" + s); + // + // } } diff --git a/server/src/test/java/org/elasticsearch/cluster/metadata/IndexMetadataVerifierTests.java b/server/src/test/java/org/elasticsearch/cluster/metadata/IndexMetadataVerifierTests.java index 074c495e53db2..cdb14e8cc9df4 100644 --- a/server/src/test/java/org/elasticsearch/cluster/metadata/IndexMetadataVerifierTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/metadata/IndexMetadataVerifierTests.java @@ -299,7 +299,8 @@ private IndexMetadataVerifier getIndexMetadataVerifier() { Settings.EMPTY, null, xContentRegistry(), - new MapperRegistry(Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), MapperPlugin.NOOP_FIELD_FILTER), + new MapperRegistry(Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), MapperPlugin.NOOP_FIELD_FILTER, + Collections.emptyMap()), IndexScopedSettings.DEFAULT_SCOPED_SETTINGS, null, MapperMetrics.NOOP diff --git a/server/src/test/java/org/elasticsearch/index/IndexSettingsTests.java b/server/src/test/java/org/elasticsearch/index/IndexSettingsTests.java index 691ca7682f30c..6739c2c830e04 100644 --- a/server/src/test/java/org/elasticsearch/index/IndexSettingsTests.java +++ b/server/src/test/java/org/elasticsearch/index/IndexSettingsTests.java @@ -857,7 +857,8 @@ public void testIndexMapperDynamic() { Settings.EMPTY, null, xContentRegistry(), - new MapperRegistry(Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), MapperPlugin.NOOP_FIELD_FILTER), + new MapperRegistry(Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), MapperPlugin.NOOP_FIELD_FILTER, + Collections.emptyMap()), IndexScopedSettings.DEFAULT_SCOPED_SETTINGS, null, MapperMetrics.NOOP diff --git a/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java b/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java index 23ee616d54231..40d0cfbd69b53 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java @@ -128,7 +128,8 @@ private CodecService createCodecService() throws IOException { Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), - MapperPlugin.NOOP_FIELD_FILTER + MapperPlugin.NOOP_FIELD_FILTER, + Collections.emptyMap() ); BitsetFilterCache bitsetFilterCache = new BitsetFilterCache(settings, BitsetFilterCache.Listener.NOOP); MapperService service = new MapperService( diff --git a/server/src/test/java/org/elasticsearch/index/mapper/MappingParserTests.java b/server/src/test/java/org/elasticsearch/index/mapper/MappingParserTests.java index 4b674cf1985b2..dc6e46a1970c5 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/MappingParserTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/MappingParserTests.java @@ -49,6 +49,7 @@ private static MappingParser createMappingParser(Settings settings, IndexVersion similarityService::getSimilarity, type -> mapperRegistry.getMapperParser(type, indexSettings.getIndexVersionCreated()), mapperRegistry.getRuntimeFieldParsers()::get, + mapperRegistry::getDenseVectorIndexType, indexSettings.getIndexVersionCreated(), () -> transportVersion, () -> { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/ParametrizedMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/ParametrizedMapperTests.java index a161bcbc5d6d2..1dc3fa3dd87cd 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/ParametrizedMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/ParametrizedMapperTests.java @@ -263,6 +263,7 @@ private static TestMapper fromMapping( return null; }, name -> null, + type -> null, version, () -> transportVersion, () -> null, diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TypeParsersTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TypeParsersTests.java index 9a30e7d696b68..416c7019a8dae 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/TypeParsersTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/TypeParsersTests.java @@ -97,6 +97,7 @@ public void testMultiFieldWithinMultiField() throws IOException { null, type -> typeParser, type -> null, + type -> null, version, () -> transportVersion, null, diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java index ef52a3a099879..46bb014441100 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java @@ -19,8 +19,6 @@ import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.VectorEncoding; import org.apache.lucene.index.VectorSimilarityFunction; -import org.apache.lucene.search.FieldExistsQuery; -import org.apache.lucene.search.Query; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.VectorUtil; import org.elasticsearch.common.bytes.BytesReference; @@ -32,14 +30,13 @@ import org.elasticsearch.index.codec.LegacyPerFieldMapperCodec; import org.elasticsearch.index.codec.PerFieldMapperCodec; import org.elasticsearch.index.codec.vectors.IVFVectorsFormat; +import org.elasticsearch.index.mapper.AbstractDenseVectorFieldMapperTestcase; import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.DocumentParsingException; -import org.elasticsearch.index.mapper.LuceneDocument; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MapperBuilderContext; import org.elasticsearch.index.mapper.MapperParsingException; import org.elasticsearch.index.mapper.MapperService; -import org.elasticsearch.index.mapper.MapperTestCase; import org.elasticsearch.index.mapper.ParsedDocument; import org.elasticsearch.index.mapper.SourceToParse; import org.elasticsearch.index.mapper.ValueFetcher; @@ -66,7 +63,6 @@ import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH; import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN; import static org.elasticsearch.index.codec.vectors.IVFVectorsFormat.DYNAMIC_NPROBE; -import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.GPU_FORMAT; import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.IVF_FORMAT; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; @@ -74,1316 +70,9 @@ import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; -public class DenseVectorFieldMapperTests extends MapperTestCase { +public class DenseVectorFieldMapperTests extends AbstractDenseVectorFieldMapperTestcase { - private static final IndexVersion INDEXED_BY_DEFAULT_PREVIOUS_INDEX_VERSION = IndexVersions.V_8_10_0; - private final ElementType elementType; - private final boolean indexed; - private final boolean indexOptionsSet; - private final int dims; - - public DenseVectorFieldMapperTests() { - this.elementType = randomFrom(ElementType.BYTE, ElementType.FLOAT, ElementType.BIT); - this.indexed = randomBoolean(); - this.indexOptionsSet = this.indexed && randomBoolean(); - this.dims = ElementType.BIT == elementType ? 4 * Byte.SIZE : 4; - } - - @Override - protected void minimalMapping(XContentBuilder b) throws IOException { - indexMapping(b, IndexVersion.current()); - } - - @Override - protected void minimalMapping(XContentBuilder b, IndexVersion indexVersion) throws IOException { - indexMapping(b, indexVersion); - } - - private void indexMapping(XContentBuilder b, IndexVersion indexVersion) throws IOException { - b.field("type", "dense_vector").field("dims", dims); - if (elementType != ElementType.FLOAT) { - b.field("element_type", elementType.toString()); - } - if (indexVersion.onOrAfter(DenseVectorFieldMapper.INDEXED_BY_DEFAULT_INDEX_VERSION) || indexed) { - // Serialize if it's new index version, or it was not the default for previous indices - b.field("index", indexed); - } - if (indexVersion.onOrAfter(DenseVectorFieldMapper.DEFAULT_TO_INT8) - && indexed - && elementType.equals(ElementType.FLOAT) - && indexOptionsSet == false) { - b.startObject("index_options"); - b.field("type", "int8_hnsw"); - b.field("m", 16); - b.field("ef_construction", 100); - b.endObject(); - } - if (indexed) { - b.field("similarity", elementType == ElementType.BIT ? "l2_norm" : "dot_product"); - if (indexOptionsSet) { - b.startObject("index_options"); - b.field("type", "hnsw"); - b.field("m", 5); - b.field("ef_construction", 50); - b.endObject(); - } - } - } - - @Override - protected Object getSampleValueForDocument() { - return elementType == ElementType.FLOAT ? List.of(0.5, 0.5, 0.5, 0.5) : List.of((byte) 1, (byte) 1, (byte) 1, (byte) 1); - } - - @Override - protected void registerParameters(ParameterChecker checker) throws IOException { - checker.registerConflictCheck( - "dims", - fieldMapping(b -> b.field("type", "dense_vector").field("dims", dims)), - fieldMapping(b -> b.field("type", "dense_vector").field("dims", dims + 8)) - ); - checker.registerConflictCheck( - "similarity", - fieldMapping(b -> b.field("type", "dense_vector").field("dims", dims).field("index", true).field("similarity", "dot_product")), - fieldMapping(b -> b.field("type", "dense_vector").field("dims", dims).field("index", true).field("similarity", "l2_norm")) - ); - checker.registerConflictCheck( - "index", - fieldMapping(b -> b.field("type", "dense_vector").field("dims", dims).field("index", true).field("similarity", "dot_product")), - fieldMapping(b -> b.field("type", "dense_vector").field("dims", dims).field("index", false)) - ); - checker.registerConflictCheck( - "element_type", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .field("similarity", "dot_product") - .field("element_type", "byte") - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .field("similarity", "dot_product") - .field("element_type", "float") - ) - ); - checker.registerConflictCheck( - "element_type", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .field("similarity", "l2_norm") - .field("element_type", "float") - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 8) - .field("index", true) - .field("similarity", "l2_norm") - .field("element_type", "bit") - ) - ); - checker.registerConflictCheck( - "element_type", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .field("similarity", "l2_norm") - .field("element_type", "byte") - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 8) - .field("index", true) - .field("similarity", "l2_norm") - .field("element_type", "bit") - ) - ); - // update for flat - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "flat") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_flat") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"int8_flat\"")) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "flat") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_flat") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"int4_flat\"")) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "flat") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "hnsw") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"hnsw\"")) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "flat") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_hnsw") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"int8_hnsw\"")) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "flat") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_hnsw") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"int4_hnsw\"")) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "flat") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_flat") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"bbq_flat\"")) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "flat") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_hnsw") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"bbq_hnsw\"")) - ); - // update for int8_flat - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_flat") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "hnsw") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"hnsw\"")) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_flat") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_hnsw") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"int8_hnsw\"")) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_flat") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_hnsw") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"int4_hnsw\"")) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_flat") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_flat") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"int4_flat\"")) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_flat") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "flat") - .endObject() - ) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "int8_flat") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_flat") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"bbq_flat\"")) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "int8_flat") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_hnsw") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"bbq_hnsw\"")) - ); - // update for hnsw - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "hnsw") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_hnsw") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"int8_hnsw\"")) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "hnsw") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_hnsw") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"int4_hnsw\"")) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "hnsw") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "hnsw") - .field("m", 100) - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"hnsw\"")) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "hnsw") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "flat") - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "hnsw") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_flat") - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "hnsw") - .field("m", 32) - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "hnsw") - .field("m", 16) - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "hnsw") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_flat") - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "hnsw") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_flat") - .endObject() - ) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "hnsw") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_hnsw") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"bbq_hnsw\"")) - ); - // update for int8_hnsw - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_hnsw") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_hnsw") - .field("m", 256) - .endObject(), - m -> assertTrue(m.toString().contains("\"m\":256")) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_hnsw") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_hnsw") - .field("m", 256) - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"int4_hnsw\"")) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_hnsw") - .field("m", 32) - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_hnsw") - .field("m", 16) - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_hnsw") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "flat") - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_hnsw") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_flat") - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_hnsw") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_flat") - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "int8_hnsw") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_flat") - .endObject() - ) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "int8_hnsw") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_hnsw") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"bbq_hnsw\"")) - ); - // update for int4_flat - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_flat") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_hnsw") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"int4_hnsw\"")) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_flat") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_hnsw") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"int8_hnsw\"")) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_flat") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "hnsw") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"hnsw\"")) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_flat") - .field("m", 32) - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_flat") - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_flat") - .field("m", 32) - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "flat") - .endObject() - ) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "int4_flat") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_flat") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"bbq_flat\"")) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "int4_flat") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_hnsw") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"bbq_hnsw\"")) - ); - // update for int4_hnsw - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_hnsw") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("m", 256) - .field("type", "int4_hnsw") - .endObject(), - m -> assertTrue(m.toString().contains("\"m\":256")) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_hnsw") - .field("confidence_interval", 0.03) - .field("m", 4) - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_hnsw") - .field("confidence_interval", 0.03) - .field("m", 100) - .endObject(), - m -> assertTrue(m.toString().contains("\"m\":100")) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_hnsw") - .field("m", 32) - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_hnsw") - .field("m", 16) - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_hnsw") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_hnsw") - .field("confidence_interval", 0.3) - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_hnsw") - .field("m", 32) - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_hnsw") - .field("m", 16) - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_hnsw") - .field("m", 32) - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "hnsw") - .field("m", 16) - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_hnsw") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "flat") - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_hnsw") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_flat") - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_hnsw") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_flat") - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "int4_hnsw") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_flat") - .endObject() - ) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "int4_hnsw") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_hnsw") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"bbq_hnsw\"")) - ); - // update for bbq_flat - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_flat") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_hnsw") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"bbq_hnsw\"")) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_flat") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "flat") - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_flat") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "int8_flat") - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_flat") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "int4_flat") - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_flat") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "hnsw") - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_flat") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "int8_hnsw") - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_flat") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "int8_hnsw") - .endObject() - ) - ); - // update for bbq_hnsw - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_hnsw") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "flat") - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_hnsw") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "int8_flat") - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_hnsw") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "int4_flat") - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_hnsw") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "hnsw") - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_hnsw") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "int8_hnsw") - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_hnsw") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "int4_hnsw") - .endObject() - ) - ); - } - - @Override - protected boolean supportsStoredFields() { - return false; - } - - @Override - protected boolean supportsIgnoreMalformed() { - return false; - } - - @Override - protected void assertSearchable(MappedFieldType fieldType) { - assertThat(fieldType, instanceOf(DenseVectorFieldType.class)); - assertEquals(fieldType.isIndexed(), indexed); - assertEquals(fieldType.isSearchable(), indexed); - } - - protected void assertExistsQuery(MappedFieldType fieldType, Query query, LuceneDocument fields) { - assertThat(query, instanceOf(FieldExistsQuery.class)); - FieldExistsQuery existsQuery = (FieldExistsQuery) query; - assertEquals("field", existsQuery.getField()); - assertNoFieldNamesField(fields); - } - - // We override this because dense vectors are the only field type that are not aggregatable but - // that do provide fielddata. TODO: resolve this inconsistency! - @Override - public void testAggregatableConsistency() {} - - public void testGPUParsing() throws IOException { - assumeTrue("feature flag [gpu_format] must be enabled", GPU_FORMAT.isEnabled()); - DocumentMapper mapperService = createDocumentMapper(fieldMapping(b -> { - b.field("type", "dense_vector"); - b.field("dims", 128); - b.field("index", true); - b.field("similarity", "dot_product"); - b.startObject("index_options"); - b.field("type", "gpu"); - b.endObject(); - })); - DenseVectorFieldMapper denseVectorFieldMapper = (DenseVectorFieldMapper) mapperService.mappers().getMapper("field"); - DenseVectorFieldMapper.GPUIndexOptions indexOptions = (DenseVectorFieldMapper.GPUIndexOptions) denseVectorFieldMapper.fieldType() - .getIndexOptions(); - // TODO: finish tests - } - - public void testGPUParsingFailureInRelease() { - assumeFalse("feature flag [gpu_format] must be disabled", GPU_FORMAT.isEnabled()); - - Exception e = expectThrows( - MapperParsingException.class, - () -> createDocumentMapper( - fieldMapping( - b -> b.field("type", "dense_vector").field("dims", dims).startObject("index_options").field("type", "gpu").endObject() - ) - ) - ); - assertThat(e.getMessage(), containsString("Unknown vector index options")); - } - - public void testKnnGPUVectorsFormat() throws IOException { - assumeTrue("feature flag [gpu_format] must be enabled", GPU_FORMAT.isEnabled()); - final int dims = randomIntBetween(64, 4096); - MapperService mapperService = createMapperService(fieldMapping(b -> { - b.field("type", "dense_vector"); - b.field("dims", dims); - b.field("index", true); - b.field("similarity", "dot_product"); - b.startObject("index_options"); - b.field("type", "gpu"); - b.endObject(); - })); - CodecService codecService = new CodecService(mapperService, BigArrays.NON_RECYCLING_INSTANCE); - Codec codec = codecService.codec("default"); - KnnVectorsFormat knnVectorsFormat; - if (CodecService.ZSTD_STORED_FIELDS_FEATURE_FLAG) { - assertThat(codec, instanceOf(PerFieldMapperCodec.class)); - knnVectorsFormat = ((PerFieldMapperCodec) codec).getKnnVectorsFormatForField("field"); - } else { - if (codec instanceof CodecService.DeduplicateFieldInfosCodec deduplicateFieldInfosCodec) { - codec = deduplicateFieldInfosCodec.delegate(); - } - assertThat(codec, instanceOf(LegacyPerFieldMapperCodec.class)); - knnVectorsFormat = ((LegacyPerFieldMapperCodec) codec).getKnnVectorsFormatForField("field"); - } - String expectedString = "GPUVectorsFormat()"; - assertEquals(expectedString, knnVectorsFormat.toString()); - } + public DenseVectorFieldMapperTests() {} public void testIVFParsing() throws IOException { assumeTrue("feature flag [ivf_format] must be enabled", IVF_FORMAT.isEnabled()); @@ -2100,7 +789,9 @@ public void testValidateOnBuild() { final MapperBuilderContext context = MapperBuilderContext.root(false, false); // Build a dense vector field mapper with float element type, which will trigger int8 HNSW index options - DenseVectorFieldMapper mapper = new DenseVectorFieldMapper.Builder("test", IndexVersion.current()).elementType(ElementType.FLOAT) + DenseVectorFieldMapper mapper = new DenseVectorFieldMapper.Builder("test", IndexVersion.current(), + type -> DenseVectorFieldMapper.allBasicVectorIndexTypes().get(type)) + .elementType(ElementType.FLOAT) .build(context); // Change the element type to byte, which is incompatible with int8 HNSW index options diff --git a/server/src/test/java/org/elasticsearch/index/query/SearchExecutionContextTests.java b/server/src/test/java/org/elasticsearch/index/query/SearchExecutionContextTests.java index 0c31ab703862f..f61f86c2043a4 100644 --- a/server/src/test/java/org/elasticsearch/index/query/SearchExecutionContextTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/SearchExecutionContextTests.java @@ -543,6 +543,7 @@ private static MapperService createMapperService(IndexSettings indexSettings, Ma null, type -> mapperRegistry.getMapperParser(type, indexSettings.getIndexVersionCreated()), mapperRegistry.getRuntimeFieldParsers()::get, + mapperRegistry::getDenseVectorIndexType, indexSettings.getIndexVersionCreated(), () -> TransportVersion.current(), searchExecutionContextSupplier, diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/AbstractDenseVectorFieldMapperTestcase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/AbstractDenseVectorFieldMapperTestcase.java new file mode 100644 index 0000000000000..e8a630b4f5d9b --- /dev/null +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/AbstractDenseVectorFieldMapperTestcase.java @@ -0,0 +1,1432 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.mapper; + + +import org.apache.lucene.search.FieldExistsQuery; +import org.apache.lucene.search.Query; +import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.IndexVersions; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.DenseVectorFieldType; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.ElementType; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.VectorSimilarity; +import org.elasticsearch.index.query.SearchExecutionContext; +import org.elasticsearch.search.lookup.Source; +import org.elasticsearch.search.lookup.SourceProvider; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xcontent.XContentBuilder; +import org.junit.AssumptionViolatedException; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public abstract class AbstractDenseVectorFieldMapperTestcase extends MapperTestCase { + + protected static final IndexVersion INDEXED_BY_DEFAULT_PREVIOUS_INDEX_VERSION = IndexVersions.V_8_10_0; + protected final ElementType elementType; + protected final boolean indexed; + protected final boolean indexOptionsSet; + protected final int dims; + + protected AbstractDenseVectorFieldMapperTestcase() { + this.elementType = randomFrom(ElementType.BYTE, ElementType.FLOAT, ElementType.BIT); + this.indexed = randomBoolean(); + this.indexOptionsSet = this.indexed && randomBoolean(); + this.dims = ElementType.BIT == elementType ? 4 * Byte.SIZE : 4; + } + + @Override + protected void minimalMapping(XContentBuilder b) throws IOException { + indexMapping(b, IndexVersion.current()); + } + + @Override + protected void minimalMapping(XContentBuilder b, IndexVersion indexVersion) throws IOException { + indexMapping(b, indexVersion); + } + + protected void indexMapping(XContentBuilder b, IndexVersion indexVersion) throws IOException { + b.field("type", "dense_vector").field("dims", dims); + if (elementType != ElementType.FLOAT) { + b.field("element_type", elementType.toString()); + } + if (indexVersion.onOrAfter(DenseVectorFieldMapper.INDEXED_BY_DEFAULT_INDEX_VERSION) || indexed) { + // Serialize if it's new index version, or it was not the default for previous indices + b.field("index", indexed); + } + if (indexVersion.onOrAfter(DenseVectorFieldMapper.DEFAULT_TO_INT8) + && indexed + && elementType.equals(ElementType.FLOAT) + && indexOptionsSet == false) { + b.startObject("index_options"); + b.field("type", "int8_hnsw"); + b.field("m", 16); + b.field("ef_construction", 100); + b.endObject(); + } + if (indexed) { + b.field("similarity", elementType == ElementType.BIT ? "l2_norm" : "dot_product"); + if (indexOptionsSet) { + b.startObject("index_options"); + b.field("type", "hnsw"); + b.field("m", 5); + b.field("ef_construction", 50); + b.endObject(); + } + } + } + + @Override + protected Object getSampleValueForDocument() { + return elementType == ElementType.FLOAT ? List.of(0.5, 0.5, 0.5, 0.5) : List.of((byte) 1, (byte) 1, (byte) 1, (byte) 1); + } + + @Override + protected void registerParameters(ParameterChecker checker) throws IOException { + checker.registerConflictCheck( + "dims", + fieldMapping(b -> b.field("type", "dense_vector").field("dims", dims)), + fieldMapping(b -> b.field("type", "dense_vector").field("dims", dims + 8)) + ); + checker.registerConflictCheck( + "similarity", + fieldMapping(b -> b.field("type", "dense_vector").field("dims", dims).field("index", true).field("similarity", "dot_product")), + fieldMapping(b -> b.field("type", "dense_vector").field("dims", dims).field("index", true).field("similarity", "l2_norm")) + ); + checker.registerConflictCheck( + "index", + fieldMapping(b -> b.field("type", "dense_vector").field("dims", dims).field("index", true).field("similarity", "dot_product")), + fieldMapping(b -> b.field("type", "dense_vector").field("dims", dims).field("index", false)) + ); + checker.registerConflictCheck( + "element_type", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .field("similarity", "dot_product") + .field("element_type", "byte") + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .field("similarity", "dot_product") + .field("element_type", "float") + ) + ); + checker.registerConflictCheck( + "element_type", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .field("similarity", "l2_norm") + .field("element_type", "float") + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims * 8) + .field("index", true) + .field("similarity", "l2_norm") + .field("element_type", "bit") + ) + ); + checker.registerConflictCheck( + "element_type", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .field("similarity", "l2_norm") + .field("element_type", "byte") + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims * 8) + .field("index", true) + .field("similarity", "l2_norm") + .field("element_type", "bit") + ) + ); + // update for flat + checker.registerUpdateCheck( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "flat") + .endObject(), + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int8_flat") + .endObject(), + m -> assertTrue(m.toString().contains("\"type\":\"int8_flat\"")) + ); + checker.registerUpdateCheck( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "flat") + .endObject(), + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_flat") + .endObject(), + m -> assertTrue(m.toString().contains("\"type\":\"int4_flat\"")) + ); + checker.registerUpdateCheck( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "flat") + .endObject(), + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "hnsw") + .endObject(), + m -> assertTrue(m.toString().contains("\"type\":\"hnsw\"")) + ); + checker.registerUpdateCheck( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "flat") + .endObject(), + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int8_hnsw") + .endObject(), + m -> assertTrue(m.toString().contains("\"type\":\"int8_hnsw\"")) + ); + checker.registerUpdateCheck( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "flat") + .endObject(), + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_hnsw") + .endObject(), + m -> assertTrue(m.toString().contains("\"type\":\"int4_hnsw\"")) + ); + checker.registerUpdateCheck( + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "flat") + .endObject(), + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "bbq_flat") + .endObject(), + m -> assertTrue(m.toString().contains("\"type\":\"bbq_flat\"")) + ); + checker.registerUpdateCheck( + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "flat") + .endObject(), + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "bbq_hnsw") + .endObject(), + m -> assertTrue(m.toString().contains("\"type\":\"bbq_hnsw\"")) + ); + // update for int8_flat + checker.registerUpdateCheck( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int8_flat") + .endObject(), + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "hnsw") + .endObject(), + m -> assertTrue(m.toString().contains("\"type\":\"hnsw\"")) + ); + checker.registerUpdateCheck( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int8_flat") + .endObject(), + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int8_hnsw") + .endObject(), + m -> assertTrue(m.toString().contains("\"type\":\"int8_hnsw\"")) + ); + checker.registerUpdateCheck( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int8_flat") + .endObject(), + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_hnsw") + .endObject(), + m -> assertTrue(m.toString().contains("\"type\":\"int4_hnsw\"")) + ); + checker.registerUpdateCheck( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int8_flat") + .endObject(), + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_flat") + .endObject(), + m -> assertTrue(m.toString().contains("\"type\":\"int4_flat\"")) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int8_flat") + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "flat") + .endObject() + ) + ); + checker.registerUpdateCheck( + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "int8_flat") + .endObject(), + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "bbq_flat") + .endObject(), + m -> assertTrue(m.toString().contains("\"type\":\"bbq_flat\"")) + ); + checker.registerUpdateCheck( + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "int8_flat") + .endObject(), + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "bbq_hnsw") + .endObject(), + m -> assertTrue(m.toString().contains("\"type\":\"bbq_hnsw\"")) + ); + // update for hnsw + checker.registerUpdateCheck( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "hnsw") + .endObject(), + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int8_hnsw") + .endObject(), + m -> assertTrue(m.toString().contains("\"type\":\"int8_hnsw\"")) + ); + checker.registerUpdateCheck( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "hnsw") + .endObject(), + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_hnsw") + .endObject(), + m -> assertTrue(m.toString().contains("\"type\":\"int4_hnsw\"")) + ); + checker.registerUpdateCheck( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "hnsw") + .endObject(), + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "hnsw") + .field("m", 100) + .endObject(), + m -> assertTrue(m.toString().contains("\"type\":\"hnsw\"")) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "hnsw") + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "flat") + .endObject() + ) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "hnsw") + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_flat") + .endObject() + ) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "hnsw") + .field("m", 32) + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "hnsw") + .field("m", 16) + .endObject() + ) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "hnsw") + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int8_flat") + .endObject() + ) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "hnsw") + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "bbq_flat") + .endObject() + ) + ); + checker.registerUpdateCheck( + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "hnsw") + .endObject(), + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "bbq_hnsw") + .endObject(), + m -> assertTrue(m.toString().contains("\"type\":\"bbq_hnsw\"")) + ); + // update for int8_hnsw + checker.registerUpdateCheck( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int8_hnsw") + .endObject(), + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int8_hnsw") + .field("m", 256) + .endObject(), + m -> assertTrue(m.toString().contains("\"m\":256")) + ); + checker.registerUpdateCheck( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int8_hnsw") + .endObject(), + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_hnsw") + .field("m", 256) + .endObject(), + m -> assertTrue(m.toString().contains("\"type\":\"int4_hnsw\"")) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int8_hnsw") + .field("m", 32) + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int8_hnsw") + .field("m", 16) + .endObject() + ) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int8_hnsw") + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "flat") + .endObject() + ) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int8_hnsw") + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int8_flat") + .endObject() + ) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int8_hnsw") + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_flat") + .endObject() + ) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "int8_hnsw") + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "bbq_flat") + .endObject() + ) + ); + checker.registerUpdateCheck( + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "int8_hnsw") + .endObject(), + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "bbq_hnsw") + .endObject(), + m -> assertTrue(m.toString().contains("\"type\":\"bbq_hnsw\"")) + ); + // update for int4_flat + checker.registerUpdateCheck( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_flat") + .endObject(), + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_hnsw") + .endObject(), + m -> assertTrue(m.toString().contains("\"type\":\"int4_hnsw\"")) + ); + checker.registerUpdateCheck( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_flat") + .endObject(), + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int8_hnsw") + .endObject(), + m -> assertTrue(m.toString().contains("\"type\":\"int8_hnsw\"")) + ); + checker.registerUpdateCheck( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_flat") + .endObject(), + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "hnsw") + .endObject(), + m -> assertTrue(m.toString().contains("\"type\":\"hnsw\"")) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_flat") + .field("m", 32) + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int8_flat") + .endObject() + ) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_flat") + .field("m", 32) + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "flat") + .endObject() + ) + ); + checker.registerUpdateCheck( + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "int4_flat") + .endObject(), + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "bbq_flat") + .endObject(), + m -> assertTrue(m.toString().contains("\"type\":\"bbq_flat\"")) + ); + checker.registerUpdateCheck( + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "int4_flat") + .endObject(), + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "bbq_hnsw") + .endObject(), + m -> assertTrue(m.toString().contains("\"type\":\"bbq_hnsw\"")) + ); + // update for int4_hnsw + checker.registerUpdateCheck( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_hnsw") + .endObject(), + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("m", 256) + .field("type", "int4_hnsw") + .endObject(), + m -> assertTrue(m.toString().contains("\"m\":256")) + ); + checker.registerUpdateCheck( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_hnsw") + .field("confidence_interval", 0.03) + .field("m", 4) + .endObject(), + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_hnsw") + .field("confidence_interval", 0.03) + .field("m", 100) + .endObject(), + m -> assertTrue(m.toString().contains("\"m\":100")) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_hnsw") + .field("m", 32) + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_hnsw") + .field("m", 16) + .endObject() + ) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_hnsw") + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_hnsw") + .field("confidence_interval", 0.3) + .endObject() + ) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_hnsw") + .field("m", 32) + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int8_hnsw") + .field("m", 16) + .endObject() + ) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_hnsw") + .field("m", 32) + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "hnsw") + .field("m", 16) + .endObject() + ) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_hnsw") + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "flat") + .endObject() + ) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_hnsw") + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int8_flat") + .endObject() + ) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_hnsw") + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims) + .field("index", true) + .startObject("index_options") + .field("type", "int4_flat") + .endObject() + ) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "int4_hnsw") + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "bbq_flat") + .endObject() + ) + ); + checker.registerUpdateCheck( + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "int4_hnsw") + .endObject(), + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "bbq_hnsw") + .endObject(), + m -> assertTrue(m.toString().contains("\"type\":\"bbq_hnsw\"")) + ); + // update for bbq_flat + checker.registerUpdateCheck( + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "bbq_flat") + .endObject(), + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "bbq_hnsw") + .endObject(), + m -> assertTrue(m.toString().contains("\"type\":\"bbq_hnsw\"")) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "bbq_flat") + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "flat") + .endObject() + ) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "bbq_flat") + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "int8_flat") + .endObject() + ) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "bbq_flat") + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "int4_flat") + .endObject() + ) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "bbq_flat") + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "hnsw") + .endObject() + ) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "bbq_flat") + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "int8_hnsw") + .endObject() + ) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "bbq_flat") + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "int8_hnsw") + .endObject() + ) + ); + // update for bbq_hnsw + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "bbq_hnsw") + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "flat") + .endObject() + ) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "bbq_hnsw") + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "int8_flat") + .endObject() + ) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "bbq_hnsw") + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "int4_flat") + .endObject() + ) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "bbq_hnsw") + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "hnsw") + .endObject() + ) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "bbq_hnsw") + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "int8_hnsw") + .endObject() + ) + ); + checker.registerConflictCheck( + "index_options", + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "bbq_hnsw") + .endObject() + ), + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", dims * 16) + .field("index", true) + .startObject("index_options") + .field("type", "int4_hnsw") + .endObject() + ) + ); + } + + @Override + protected boolean supportsStoredFields() { + return false; + } + + @Override + protected boolean supportsIgnoreMalformed() { + return false; + } + + @Override + protected void assertSearchable(MappedFieldType fieldType) { + assertThat(fieldType, instanceOf(DenseVectorFieldType.class)); + assertEquals(fieldType.isIndexed(), indexed); + assertEquals(fieldType.isSearchable(), indexed); + } + + protected void assertExistsQuery(MappedFieldType fieldType, Query query, LuceneDocument fields) { + assertThat(query, instanceOf(FieldExistsQuery.class)); + FieldExistsQuery existsQuery = (FieldExistsQuery) query; + assertEquals("field", existsQuery.getField()); + assertNoFieldNamesField(fields); + } + + // We override this because dense vectors are the only field type that are not aggregatable but + // that do provide fielddata. TODO: resolve this inconsistency! + @Override + public void testAggregatableConsistency() {} + + + @Override + protected void assertFetchMany(MapperService mapperService, String field, Object value, String format, int count) throws IOException { + assumeFalse("Dense vectors currently don't support multiple values in the same field", false); + } + + /** + * Dense vectors don't support doc values or string representation (for doc value parser/fetching). + * We may eventually support that, but until then, we only verify that the parsing and fields fetching matches the provided value object + */ + @Override + protected void assertFetch(MapperService mapperService, String field, Object value, String format) throws IOException { + MappedFieldType ft = mapperService.fieldType(field); + MappedFieldType.FielddataOperation fdt = MappedFieldType.FielddataOperation.SEARCH; + SourceToParse source = source(b -> b.field(ft.name(), value)); + SearchExecutionContext searchExecutionContext = mock(SearchExecutionContext.class); + when(searchExecutionContext.isSourceEnabled()).thenReturn(true); + when(searchExecutionContext.sourcePath(field)).thenReturn(Set.of(field)); + when(searchExecutionContext.getForField(ft, fdt)).thenAnswer(inv -> fieldDataLookup(mapperService).apply(ft, () -> { + throw new UnsupportedOperationException(); + }, fdt)); + ValueFetcher nativeFetcher = ft.valueFetcher(searchExecutionContext, format); + ParsedDocument doc = mapperService.documentMapper().parse(source); + withLuceneIndex(mapperService, iw -> iw.addDocuments(doc.docs()), ir -> { + Source s = SourceProvider.fromLookup(mapperService.mappingLookup(), null, mapperService.getMapperMetrics().sourceFieldMetrics()) + .getSource(ir.leaves().get(0), 0); + nativeFetcher.setNextReader(ir.leaves().get(0)); + List fromNative = nativeFetcher.fetchValues(s, 0, new ArrayList<>()); + DenseVectorFieldType denseVectorFieldType = (DenseVectorFieldType) ft; + switch (denseVectorFieldType.getElementType()) { + case BYTE -> { + assumeFalse("byte element type testing not currently added", false); + } + case FLOAT -> { + float[] fetchedFloats = new float[denseVectorFieldType.getVectorDimensions()]; + int i = 0; + for (var f : fromNative) { + assert f instanceof Number; + fetchedFloats[i++] = ((Number) f).floatValue(); + } + assertThat("fetching " + value, fetchedFloats, equalTo(value)); + } + } + }); + } + + @Override + // TODO: add `byte` element_type tests + protected void randomFetchTestFieldConfig(XContentBuilder b) throws IOException { + b.field("type", "dense_vector").field("dims", randomIntBetween(2, 4096)).field("element_type", "float"); + if (randomBoolean()) { + b.field("index", true).field("similarity", randomFrom(VectorSimilarity.values()).toString()); + } + } + + @Override + protected Object generateRandomInputValue(MappedFieldType ft) { + DenseVectorFieldType vectorFieldType = (DenseVectorFieldType) ft; + return switch (vectorFieldType.getElementType()) { + case BYTE -> randomByteArrayOfLength(vectorFieldType.getVectorDimensions()); + case FLOAT -> { + float[] floats = new float[vectorFieldType.getVectorDimensions()]; + float magnitude = 0; + for (int i = 0; i < floats.length; i++) { + float f = randomFloat(); + floats[i] = f; + magnitude += f * f; + } + magnitude = (float) Math.sqrt(magnitude); + if (VectorSimilarity.DOT_PRODUCT.equals(vectorFieldType.getSimilarity())) { + for (int i = 0; i < floats.length; i++) { + floats[i] /= magnitude; + } + } + yield floats; + } + case BIT -> randomByteArrayOfLength(vectorFieldType.getVectorDimensions() / 8); + }; + } + + @Override + protected IngestScriptSupport ingestScriptSupport() { + throw new AssumptionViolatedException("not supported"); + } + + @Override + protected SyntheticSourceSupport syntheticSourceSupport(boolean ignoreMalformed) { + return new DenseVectorSyntheticSourceSupport(); + } + + @Override + protected boolean supportsEmptyInputArray() { + return false; + } + + private static class DenseVectorSyntheticSourceSupport implements SyntheticSourceSupport { + private final int dims = between(5, 1000); + private final ElementType elementType = randomFrom(ElementType.BYTE, ElementType.FLOAT, ElementType.BIT); + private final boolean indexed = randomBoolean(); + private final boolean indexOptionsSet = indexed && randomBoolean(); + + @Override + public SyntheticSourceExample example(int maxValues) throws IOException { + Object value = switch (elementType) { + case BYTE, BIT: + yield randomList(dims, dims, ESTestCase::randomByte); + case FLOAT: + yield randomList(dims, dims, ESTestCase::randomFloat); + }; + return new SyntheticSourceExample(value, value, this::mapping); + } + + private void mapping(XContentBuilder b) throws IOException { + b.field("type", "dense_vector"); + if (elementType == ElementType.BYTE || elementType == ElementType.BIT || randomBoolean()) { + b.field("element_type", elementType.toString()); + } + b.field("dims", elementType == ElementType.BIT ? dims * Byte.SIZE : dims); + if (indexed) { + b.field("index", true); + b.field("similarity", "l2_norm"); + if (indexOptionsSet) { + b.startObject("index_options"); + b.field("type", "hnsw"); + b.field("m", 5); + b.field("ef_construction", 50); + b.endObject(); + } + } else { + b.field("index", false); + } + } + + @Override + public List invalidExample() { + return List.of(); + } + } + + @Override + public void testSyntheticSourceKeepArrays() { + // The mapper expects to parse an array of values by default, it's not compatible with array of arrays. + } +} diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/TestDocumentParserContext.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/TestDocumentParserContext.java index 49fe9d30239ae..6290287dd2798 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/TestDocumentParserContext.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/TestDocumentParserContext.java @@ -58,6 +58,7 @@ private TestDocumentParserContext(MappingLookup mappingLookup, SourceToParse sou s -> null, s -> null, s -> null, + s -> null, IndexVersion.current(), () -> TransportVersion.current(), () -> null, diff --git a/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java b/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java index 5e451e2e79f10..ceb1462a5e961 100644 --- a/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java @@ -1197,7 +1197,7 @@ public void testSupportedFieldTypes() throws IOException { return; } - for (Map.Entry mappedType : IndicesModule.getMappers(List.of()).entrySet()) { + for (Map.Entry mappedType : IndicesModule.getMappers(List.of(), Map.of()).entrySet()) { // Some field types should not be tested, or require more work and are not ready yet if (TYPE_TEST_BLACKLIST.contains(mappedType.getKey())) { @@ -1402,6 +1402,7 @@ private static class MockParserContext extends MappingParserContext { null, null, null, + null, IndexVersion.current(), () -> TransportVersion.current(), null, diff --git a/x-pack/plugin/gpu/build.gradle b/x-pack/plugin/gpu/build.gradle new file mode 100644 index 0000000000000..686ac2254492e --- /dev/null +++ b/x-pack/plugin/gpu/build.gradle @@ -0,0 +1,21 @@ +apply plugin: 'elasticsearch.internal-es-plugin' +apply plugin: 'elasticsearch.internal-cluster-test' +esplugin { + name = 'gpu' + description = 'A plugin for GPU functionality' + classname = 'org.elasticsearch.xpack.gpu.GPUPlugin' +} +base { + archivesName = 'x-pack-gpu' +} + +dependencies { +// compileOnly project(path: xpackModule('core')) +// testImplementation(testArtifact(project(xpackModule('core')))) + + compileOnly project(':server') + + implementation "org.apache.lucene:lucene-core:${versions.lucene}" + + testImplementation project(':test:framework') +} diff --git a/x-pack/plugin/gpu/src/main/java/module-info.java b/x-pack/plugin/gpu/src/main/java/module-info.java new file mode 100644 index 0000000000000..67c76e0507a56 --- /dev/null +++ b/x-pack/plugin/gpu/src/main/java/module-info.java @@ -0,0 +1,16 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +/** Provides GPU-accelerated support for vector search. */ +module org.elasticsearch.gpu { + requires org.apache.lucene.core; + requires org.elasticsearch.xcontent; + requires org.elasticsearch.server; + requires org.elasticsearch.base; + + provides org.apache.lucene.codecs.KnnVectorsFormat with org.elasticsearch.xpack.gpu.codec.GPUVectorsFormat; +} diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java new file mode 100644 index 0000000000000..216e2639c2798 --- /dev/null +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java @@ -0,0 +1,107 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ +package org.elasticsearch.xpack.gpu; + +import org.apache.lucene.codecs.KnnVectorsFormat; +import org.elasticsearch.common.util.FeatureFlag; +import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.mapper.MappingParser; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; +import org.elasticsearch.plugins.MapperPlugin; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xpack.gpu.codec.GPUVectorsFormat; + +import java.io.IOException; +import java.util.Map; +import java.util.Objects; + +public class GPUPlugin extends Plugin implements MapperPlugin { + + public static final FeatureFlag GPU_FORMAT = new FeatureFlag("gpu_format"); + + public Map getDenseVectorIndexTypes() { + if (GPU_FORMAT.isEnabled()) { + return Map.of(GPU_INDEX_TYPE_NAME, GPU_INDEX_TYPE); + } else { + return Map.of(); + } + } + + static final String GPU_INDEX_TYPE_NAME = "gpu"; + + static final GPUIndexType GPU_INDEX_TYPE = new GPUIndexType(); + + static class GPUIndexType implements DenseVectorFieldMapper.VectorIndexType { + + @Override + public String name() { + return GPU_INDEX_TYPE_NAME; + } + + @Override + public DenseVectorFieldMapper.IndexOptions parseIndexOptions( + String fieldName, + Map indexOptionsMap, + IndexVersion indexVersion + ) { + MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap); + return new GPUIndexOptions(); + } + + @Override + public boolean supportsElementType(DenseVectorFieldMapper.ElementType elementType) { + return elementType == DenseVectorFieldMapper.ElementType.FLOAT; + } + + @Override + public boolean supportsDimension(int dims) { + return true; + } + + @Override + public boolean isQuantized() { + return false; + } + } + + static class GPUIndexOptions extends DenseVectorFieldMapper.IndexOptions { + + GPUIndexOptions() { + super(GPU_INDEX_TYPE); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field("type", type.name()); + builder.endObject(); + return builder; + } + + @Override + public KnnVectorsFormat getVectorsFormat(DenseVectorFieldMapper.ElementType elementType) { + assert elementType == DenseVectorFieldMapper.ElementType.FLOAT; + return new GPUVectorsFormat(); + } + + @Override + public boolean updatableTo(DenseVectorFieldMapper.IndexOptions update) { + return false; + } + + @Override + protected boolean doEquals(DenseVectorFieldMapper.IndexOptions o) { + return o instanceof GPUIndexOptions; + } + + @Override + protected int doHashCode() { + return Objects.hash(type); + } + } +} diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/GPUVectorsFormat.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormat.java similarity index 84% rename from server/src/main/java/org/elasticsearch/index/codec/vectors/GPUVectorsFormat.java rename to x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormat.java index 17343f7a934dc..b512ec7582592 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/GPUVectorsFormat.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormat.java @@ -1,13 +1,11 @@ /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. */ -package org.elasticsearch.index.codec.vectors; +package org.elasticsearch.xpack.gpu.codec; import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.codecs.KnnVectorsReader; diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/GPUVectorsReader.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsReader.java similarity index 95% rename from server/src/main/java/org/elasticsearch/index/codec/vectors/GPUVectorsReader.java rename to x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsReader.java index db40e07f53ef7..ae426916efd62 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/GPUVectorsReader.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsReader.java @@ -1,13 +1,11 @@ /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. */ -package org.elasticsearch.index.codec.vectors; +package org.elasticsearch.xpack.gpu.codec; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.KnnVectorsReader; diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/GPUVectorsWriter.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsWriter.java similarity index 93% rename from server/src/main/java/org/elasticsearch/index/codec/vectors/GPUVectorsWriter.java rename to x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsWriter.java index 0245105a712b1..cbca1480c3d3a 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/GPUVectorsWriter.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsWriter.java @@ -1,13 +1,11 @@ /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. */ -package org.elasticsearch.index.codec.vectors; +package org.elasticsearch.xpack.gpu.codec; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.KnnFieldVectorsWriter; diff --git a/x-pack/plugin/gpu/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat b/x-pack/plugin/gpu/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat new file mode 100644 index 0000000000000..00aa6aa7a153c --- /dev/null +++ b/x-pack/plugin/gpu/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat @@ -0,0 +1,2 @@ + +org.elasticsearch.xpack.gpu.codec.GPUVectorsFormat diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java new file mode 100644 index 0000000000000..550e26ebf5ad3 --- /dev/null +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java @@ -0,0 +1,103 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.gpu.codec; + +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.KnnVectorsFormat; +import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.index.codec.CodecService; +import org.elasticsearch.index.codec.LegacyPerFieldMapperCodec; +import org.elasticsearch.index.codec.PerFieldMapperCodec; +import org.elasticsearch.index.mapper.AbstractDenseVectorFieldMapperTestcase; +import org.elasticsearch.index.mapper.DocumentMapper; +import org.elasticsearch.index.mapper.MapperParsingException; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.xpack.gpu.GPUPlugin; +import org.junit.Before; + +import java.io.IOException; +import java.util.Collection; +import java.util.Collections; + +import static org.elasticsearch.xpack.gpu.GPUPlugin.GPU_FORMAT; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; + +public class GPUDenseVectorFieldMapperTests extends AbstractDenseVectorFieldMapperTestcase { + + @Before + public void setup() { + assumeTrue("feature flag [gpu_format] must be enabled", GPU_FORMAT.isEnabled()); + } + + @Override + protected Collection getPlugins() { + var plugin = new GPUPlugin(); + return Collections.singletonList(plugin); + } + + public void testGPUParsing() throws IOException { + DocumentMapper mapperService = createDocumentMapper(fieldMapping(b -> { + b.field("type", "dense_vector"); + b.field("dims", 128); + b.field("index", true); + b.field("similarity", "dot_product"); + b.startObject("index_options"); + b.field("type", "gpu"); + b.endObject(); + })); + var denseVectorFieldMapper = (DenseVectorFieldMapper) mapperService.mappers().getMapper("field"); + var indexOptions = denseVectorFieldMapper.fieldType().getIndexOptions(); + var name = indexOptions.type().name(); + assertThat(name, equalTo("gpu")); + // TODO: finish tests + } + + public void testGPUParsingFailureInRelease() { + Exception e = expectThrows( + MapperParsingException.class, + () -> createDocumentMapper( + fieldMapping( + b -> b.field("type", "dense_vector").field("dims", dims).startObject("index_options").field("type", "gpu").endObject() + ) + ) + ); + assertThat(e.getMessage(), containsString("Unknown vector index options")); + } + + public void testKnnGPUVectorsFormat() throws IOException { + final int dims = randomIntBetween(64, 4096); + MapperService mapperService = createMapperService(fieldMapping(b -> { + b.field("type", "dense_vector"); + b.field("dims", dims); + b.field("index", true); + b.field("similarity", "dot_product"); + b.startObject("index_options"); + b.field("type", "gpu"); + b.endObject(); + })); + CodecService codecService = new CodecService(mapperService, BigArrays.NON_RECYCLING_INSTANCE); + Codec codec = codecService.codec("default"); + KnnVectorsFormat knnVectorsFormat; + if (CodecService.ZSTD_STORED_FIELDS_FEATURE_FLAG) { + assertThat(codec, instanceOf(PerFieldMapperCodec.class)); + knnVectorsFormat = ((PerFieldMapperCodec) codec).getKnnVectorsFormatForField("field"); + } else { + if (codec instanceof CodecService.DeduplicateFieldInfosCodec deduplicateFieldInfosCodec) { + codec = deduplicateFieldInfosCodec.delegate(); + } + assertThat(codec, instanceOf(LegacyPerFieldMapperCodec.class)); + knnVectorsFormat = ((LegacyPerFieldMapperCodec) codec).getKnnVectorsFormatForField("field"); + } + String expectedString = "GPUVectorsFormat()"; + assertEquals(expectedString, knnVectorsFormat.toString()); + } +} diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/GPUVectorsFormatTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormatTests.java similarity index 87% rename from server/src/test/java/org/elasticsearch/index/codec/vectors/GPUVectorsFormatTests.java rename to x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormatTests.java index 7f7c5623740e9..837f43e7335cd 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/GPUVectorsFormatTests.java +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormatTests.java @@ -6,7 +6,7 @@ * your election, the "Elastic License 2.0", the "GNU Affero General Public * License v3.0 only", or the "Server Side Public License, v 1". */ -package org.elasticsearch.index.codec.vectors; +package org.elasticsearch.xpack.gpu.codec; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.FilterCodec; @@ -16,7 +16,6 @@ import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; import org.apache.lucene.tests.util.TestUtil; import org.elasticsearch.common.logging.LogConfigurator; -import org.junit.Before; public class GPUVectorsFormatTests extends BaseKnnVectorsFormatTestCase { @@ -24,13 +23,12 @@ public class GPUVectorsFormatTests extends BaseKnnVectorsFormatTestCase { LogConfigurator.loadLog4jPlugins(); LogConfigurator.configureESLogging(); // native access requires logging to be initialized } - KnnVectorsFormat format; - @Before + static final Codec codec = TestUtil.alwaysKnnVectorsFormat(new GPUVectorsFormat()); + @Override - public void setUp() throws Exception { - format = new GPUVectorsFormat(); - super.setUp(); + protected Codec getCodec() { + return codec; } @Override @@ -48,11 +46,6 @@ public void testSearchWithVisitedLimit() { // TODO } - @Override - protected Codec getCodec() { - return TestUtil.alwaysKnnVectorsFormat(format); - } - @Override public void testAdvance() throws Exception { // TODO From 34b0ce83aaf9255c985dcd93d03022edc8c1a339 Mon Sep 17 00:00:00 2001 From: ChrisHegarty Date: Thu, 19 Jun 2025 15:11:26 +0100 Subject: [PATCH 003/109] move GPUIndexType to it's own top-level class, and spotless --- .../metadata/IndexMetadataVerifierTests.java | 9 +- .../index/IndexSettingsTests.java | 9 +- .../vectors/DenseVectorFieldMapperTests.java | 9 +- ...bstractDenseVectorFieldMapperTestcase.java | 2 - .../elasticsearch/xpack/gpu/GPUIndexType.java | 92 +++++++++++++++++++ .../elasticsearch/xpack/gpu/GPUPlugin.java | 83 +---------------- 6 files changed, 114 insertions(+), 90 deletions(-) create mode 100644 x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUIndexType.java diff --git a/server/src/test/java/org/elasticsearch/cluster/metadata/IndexMetadataVerifierTests.java b/server/src/test/java/org/elasticsearch/cluster/metadata/IndexMetadataVerifierTests.java index cdb14e8cc9df4..b90e482609a07 100644 --- a/server/src/test/java/org/elasticsearch/cluster/metadata/IndexMetadataVerifierTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/metadata/IndexMetadataVerifierTests.java @@ -299,8 +299,13 @@ private IndexMetadataVerifier getIndexMetadataVerifier() { Settings.EMPTY, null, xContentRegistry(), - new MapperRegistry(Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), MapperPlugin.NOOP_FIELD_FILTER, - Collections.emptyMap()), + new MapperRegistry( + Collections.emptyMap(), + Collections.emptyMap(), + Collections.emptyMap(), + MapperPlugin.NOOP_FIELD_FILTER, + Collections.emptyMap() + ), IndexScopedSettings.DEFAULT_SCOPED_SETTINGS, null, MapperMetrics.NOOP diff --git a/server/src/test/java/org/elasticsearch/index/IndexSettingsTests.java b/server/src/test/java/org/elasticsearch/index/IndexSettingsTests.java index 6739c2c830e04..5122c9b725d8c 100644 --- a/server/src/test/java/org/elasticsearch/index/IndexSettingsTests.java +++ b/server/src/test/java/org/elasticsearch/index/IndexSettingsTests.java @@ -857,8 +857,13 @@ public void testIndexMapperDynamic() { Settings.EMPTY, null, xContentRegistry(), - new MapperRegistry(Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), MapperPlugin.NOOP_FIELD_FILTER, - Collections.emptyMap()), + new MapperRegistry( + Collections.emptyMap(), + Collections.emptyMap(), + Collections.emptyMap(), + MapperPlugin.NOOP_FIELD_FILTER, + Collections.emptyMap() + ), IndexScopedSettings.DEFAULT_SCOPED_SETTINGS, null, MapperMetrics.NOOP diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java index 46bb014441100..24a1387b78e65 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java @@ -789,10 +789,11 @@ public void testValidateOnBuild() { final MapperBuilderContext context = MapperBuilderContext.root(false, false); // Build a dense vector field mapper with float element type, which will trigger int8 HNSW index options - DenseVectorFieldMapper mapper = new DenseVectorFieldMapper.Builder("test", IndexVersion.current(), - type -> DenseVectorFieldMapper.allBasicVectorIndexTypes().get(type)) - .elementType(ElementType.FLOAT) - .build(context); + DenseVectorFieldMapper mapper = new DenseVectorFieldMapper.Builder( + "test", + IndexVersion.current(), + type -> DenseVectorFieldMapper.allBasicVectorIndexTypes().get(type) + ).elementType(ElementType.FLOAT).build(context); // Change the element type to byte, which is incompatible with int8 HNSW index options DenseVectorFieldMapper.Builder builder = (DenseVectorFieldMapper.Builder) mapper.getMergeBuilder(); diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/AbstractDenseVectorFieldMapperTestcase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/AbstractDenseVectorFieldMapperTestcase.java index e8a630b4f5d9b..4a78b61c37d31 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/AbstractDenseVectorFieldMapperTestcase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/AbstractDenseVectorFieldMapperTestcase.java @@ -9,7 +9,6 @@ package org.elasticsearch.index.mapper; - import org.apache.lucene.search.FieldExistsQuery; import org.apache.lucene.search.Query; import org.elasticsearch.index.IndexVersion; @@ -1286,7 +1285,6 @@ protected void assertExistsQuery(MappedFieldType fieldType, Query query, LuceneD @Override public void testAggregatableConsistency() {} - @Override protected void assertFetchMany(MapperService mapperService, String field, Object value, String format, int count) throws IOException { assumeFalse("Dense vectors currently don't support multiple values in the same field", false); diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUIndexType.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUIndexType.java new file mode 100644 index 0000000000000..7f36c50f87968 --- /dev/null +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUIndexType.java @@ -0,0 +1,92 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.gpu; + +import org.apache.lucene.codecs.KnnVectorsFormat; +import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.mapper.MappingParser; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xpack.gpu.codec.GPUVectorsFormat; + +import java.io.IOException; +import java.util.Map; +import java.util.Objects; + +class GPUIndexType implements DenseVectorFieldMapper.VectorIndexType { + + static final String GPU_INDEX_TYPE_NAME = "gpu"; + + static final GPUIndexType GPU_INDEX_TYPE = new GPUIndexType(); + + @Override + public String name() { + return GPU_INDEX_TYPE_NAME; + } + + @Override + public DenseVectorFieldMapper.IndexOptions parseIndexOptions( + String fieldName, + Map indexOptionsMap, + IndexVersion indexVersion + ) { + MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap); + return new GPUIndexOptions(); + } + + @Override + public boolean supportsElementType(DenseVectorFieldMapper.ElementType elementType) { + return elementType == DenseVectorFieldMapper.ElementType.FLOAT; + } + + @Override + public boolean supportsDimension(int dims) { + return true; + } + + @Override + public boolean isQuantized() { + return false; + } + + static class GPUIndexOptions extends DenseVectorFieldMapper.IndexOptions { + + GPUIndexOptions() { + super(GPU_INDEX_TYPE); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field("type", type.name()); + builder.endObject(); + return builder; + } + + @Override + public KnnVectorsFormat getVectorsFormat(DenseVectorFieldMapper.ElementType elementType) { + assert elementType == DenseVectorFieldMapper.ElementType.FLOAT; + return new GPUVectorsFormat(); + } + + @Override + public boolean updatableTo(DenseVectorFieldMapper.IndexOptions update) { + return false; + } + + @Override + protected boolean doEquals(DenseVectorFieldMapper.IndexOptions o) { + return o instanceof GPUIndexOptions; + } + + @Override + protected int doHashCode() { + return Objects.hash(type); + } + } +} diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java index 216e2639c2798..94e54871f4e90 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java @@ -6,19 +6,15 @@ */ package org.elasticsearch.xpack.gpu; -import org.apache.lucene.codecs.KnnVectorsFormat; import org.elasticsearch.common.util.FeatureFlag; -import org.elasticsearch.index.IndexVersion; -import org.elasticsearch.index.mapper.MappingParser; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; import org.elasticsearch.plugins.MapperPlugin; import org.elasticsearch.plugins.Plugin; -import org.elasticsearch.xcontent.XContentBuilder; -import org.elasticsearch.xpack.gpu.codec.GPUVectorsFormat; -import java.io.IOException; import java.util.Map; -import java.util.Objects; + +import static org.elasticsearch.xpack.gpu.GPUIndexType.GPU_INDEX_TYPE; +import static org.elasticsearch.xpack.gpu.GPUIndexType.GPU_INDEX_TYPE_NAME; public class GPUPlugin extends Plugin implements MapperPlugin { @@ -31,77 +27,4 @@ public Map getDenseVectorIndexTy return Map.of(); } } - - static final String GPU_INDEX_TYPE_NAME = "gpu"; - - static final GPUIndexType GPU_INDEX_TYPE = new GPUIndexType(); - - static class GPUIndexType implements DenseVectorFieldMapper.VectorIndexType { - - @Override - public String name() { - return GPU_INDEX_TYPE_NAME; - } - - @Override - public DenseVectorFieldMapper.IndexOptions parseIndexOptions( - String fieldName, - Map indexOptionsMap, - IndexVersion indexVersion - ) { - MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap); - return new GPUIndexOptions(); - } - - @Override - public boolean supportsElementType(DenseVectorFieldMapper.ElementType elementType) { - return elementType == DenseVectorFieldMapper.ElementType.FLOAT; - } - - @Override - public boolean supportsDimension(int dims) { - return true; - } - - @Override - public boolean isQuantized() { - return false; - } - } - - static class GPUIndexOptions extends DenseVectorFieldMapper.IndexOptions { - - GPUIndexOptions() { - super(GPU_INDEX_TYPE); - } - - @Override - public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - builder.startObject(); - builder.field("type", type.name()); - builder.endObject(); - return builder; - } - - @Override - public KnnVectorsFormat getVectorsFormat(DenseVectorFieldMapper.ElementType elementType) { - assert elementType == DenseVectorFieldMapper.ElementType.FLOAT; - return new GPUVectorsFormat(); - } - - @Override - public boolean updatableTo(DenseVectorFieldMapper.IndexOptions update) { - return false; - } - - @Override - protected boolean doEquals(DenseVectorFieldMapper.IndexOptions o) { - return o instanceof GPUIndexOptions; - } - - @Override - protected int doHashCode() { - return Objects.hash(type); - } - } } From d3db656044a21f76965f3243e2d132fae33e4333 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Thu, 19 Jun 2025 16:25:08 -0400 Subject: [PATCH 004/109] Add yml rest test that can be called: ./gradlew ":x-pack:plugin:gpu:yamlRestTest" --tests "org.elasticsearch.xpack.gpu.GPUYamlTestSuiteIT.test {p0=gpu/10_basic/*}" --- .../index/store/LuceneFilesExtensions.java | 5 ++- x-pack/plugin/gpu/build.gradle | 22 +++++---- .../gpu/codec/GPUVectorsFormatTests.java | 8 ++-- .../xpack/gpu/GPUYamlTestSuiteIT.java | 39 ++++++++++++++++ .../rest-api-spec/test/gpu/10_basic.yml | 45 +++++++++++++++++++ .../mapper/SemanticTextFieldMapper.java | 3 +- 6 files changed, 107 insertions(+), 15 deletions(-) create mode 100644 x-pack/plugin/gpu/src/yamlRestTest/java/org/elasticsearch/xpack/gpu/GPUYamlTestSuiteIT.java create mode 100644 x-pack/plugin/gpu/src/yamlRestTest/resources/rest-api-spec/test/gpu/10_basic.yml diff --git a/server/src/main/java/org/elasticsearch/index/store/LuceneFilesExtensions.java b/server/src/main/java/org/elasticsearch/index/store/LuceneFilesExtensions.java index 65b3e85d7e7ba..1ac4922247357 100644 --- a/server/src/main/java/org/elasticsearch/index/store/LuceneFilesExtensions.java +++ b/server/src/main/java/org/elasticsearch/index/store/LuceneFilesExtensions.java @@ -87,7 +87,10 @@ public enum LuceneFilesExtensions { // ivf vectors format MIVF("mivf", "IVF Metadata", true, false), CENIVF("cenivf", "IVF Centroid Data", false, true), - CLIVF("clivf", "IVF Cluster Data", false, true); + CLIVF("clivf", "IVF Cluster Data", false, true), + // GPU vectors format + GPUIDX("gpuidx", "GPU Vector Index", false, true), + MGPU("mgpu", "GPU Vector Metadata", true, false); /** * Allow plugin developers of custom codecs to opt out of the assertion in {@link #fromExtension} diff --git a/x-pack/plugin/gpu/build.gradle b/x-pack/plugin/gpu/build.gradle index 686ac2254492e..f5880c83209d0 100644 --- a/x-pack/plugin/gpu/build.gradle +++ b/x-pack/plugin/gpu/build.gradle @@ -1,21 +1,27 @@ apply plugin: 'elasticsearch.internal-es-plugin' apply plugin: 'elasticsearch.internal-cluster-test' +apply plugin: 'elasticsearch.internal-yaml-rest-test' + esplugin { name = 'gpu' - description = 'A plugin for GPU functionality' + description = 'A plugin for doing vector search in GPU' classname = 'org.elasticsearch.xpack.gpu.GPUPlugin' + extendedPlugins = ['x-pack-core'] } base { archivesName = 'x-pack-gpu' } dependencies { -// compileOnly project(path: xpackModule('core')) -// testImplementation(testArtifact(project(xpackModule('core')))) - + compileOnly project(path: xpackModule('core')) compileOnly project(':server') - - implementation "org.apache.lucene:lucene-core:${versions.lucene}" - - testImplementation project(':test:framework') + testImplementation(testArtifact(project(xpackModule('core')))) + testImplementation(testArtifact(project(':server'))) + clusterModules project(xpackModule('gpu')) +} +tasks.named("yamlRestTest") { + usesDefaultDistribution("uses gpu plugin") +} +artifacts { + restXpackTests(new File(projectDir, "src/yamlRestTest/resources/rest-api-spec/test")) } diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormatTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormatTests.java index 837f43e7335cd..6de297fdc89ee 100644 --- a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormatTests.java +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormatTests.java @@ -1,10 +1,8 @@ /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. */ package org.elasticsearch.xpack.gpu.codec; diff --git a/x-pack/plugin/gpu/src/yamlRestTest/java/org/elasticsearch/xpack/gpu/GPUYamlTestSuiteIT.java b/x-pack/plugin/gpu/src/yamlRestTest/java/org/elasticsearch/xpack/gpu/GPUYamlTestSuiteIT.java new file mode 100644 index 0000000000000..eee4c704c80ea --- /dev/null +++ b/x-pack/plugin/gpu/src/yamlRestTest/java/org/elasticsearch/xpack/gpu/GPUYamlTestSuiteIT.java @@ -0,0 +1,39 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ +package org.elasticsearch.xpack.gpu; + +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.elasticsearch.test.cluster.ElasticsearchCluster; +import org.elasticsearch.test.rest.yaml.ClientYamlTestCandidate; +import org.elasticsearch.test.rest.yaml.ESClientYamlSuiteTestCase; +import org.junit.ClassRule; + +public class GPUYamlTestSuiteIT extends ESClientYamlSuiteTestCase { + + @ClassRule + public static ElasticsearchCluster cluster = ElasticsearchCluster.local() + .nodes(1) + .module("gpu") + .setting("xpack.license.self_generated.type", "basic") + .setting("xpack.security.enabled", "false") + .build(); + + public GPUYamlTestSuiteIT(final ClientYamlTestCandidate testCandidate) { + super(testCandidate); + } + + @ParametersFactory + public static Iterable parameters() throws Exception { + return ESClientYamlSuiteTestCase.createParameters(); + } + + @Override + protected String getTestRestCluster() { + return cluster.getHttpAddresses(); + } +} diff --git a/x-pack/plugin/gpu/src/yamlRestTest/resources/rest-api-spec/test/gpu/10_basic.yml b/x-pack/plugin/gpu/src/yamlRestTest/resources/rest-api-spec/test/gpu/10_basic.yml new file mode 100644 index 0000000000000..82ced7dfc1da3 --- /dev/null +++ b/x-pack/plugin/gpu/src/yamlRestTest/resources/rest-api-spec/test/gpu/10_basic.yml @@ -0,0 +1,45 @@ +--- +"Test GPU vector operations": + - do: + indices.create: + index: gpu_vectors + body: + mappings: + properties: + embedding: + type: dense_vector + dims: 128 + similarity: l2_norm + index_options: + type: gpu + + - do: + bulk: + index: gpu_vectors + refresh: true + body: + - index: + _id: "1" + - text: "First document" + embedding: [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8] + - index: + _id: "2" + - text: "Second document" + embedding: [0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 1.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 1.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 1.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 1.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 1.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 1.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 1.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 1.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 1.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 1.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 1.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 1.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85] + - index: + _id: "3" + - text: "Third document" + embedding: [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] + - match: { errors: false } + + - do: + search: + index: gpu_vectors + body: + knn: + field: embedding + query_vector: [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8] + k: 3 + + - match: { hits.total.value: 3 } + - match: { hits.hits.0._id: "1" } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java index 92337c8e7fc8d..032553bb61573 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java @@ -1081,7 +1081,8 @@ private static Mapper.Builder createEmbeddingsField( case TEXT_EMBEDDING -> { DenseVectorFieldMapper.Builder denseVectorMapperBuilder = new DenseVectorFieldMapper.Builder( CHUNKED_EMBEDDINGS_FIELD, - indexVersionCreated + indexVersionCreated, + type -> DenseVectorFieldMapper.allBasicVectorIndexTypes().get(type) ); SimilarityMeasure similarity = modelSettings.similarity(); From 82f1ca5ebe0c49ac5e97ac28e5ddc9d4f8a3ef6e Mon Sep 17 00:00:00 2001 From: ChrisHegarty Date: Fri, 20 Jun 2025 08:36:40 +0100 Subject: [PATCH 005/109] Reload KnnVectorsFormat to allow plugins to declare a custom knn vectors format. --- .../src/main/java/org/elasticsearch/plugins/PluginsService.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/plugins/PluginsService.java b/server/src/main/java/org/elasticsearch/plugins/PluginsService.java index 78a8650a5e920..348cd7e3cd4b2 100644 --- a/server/src/main/java/org/elasticsearch/plugins/PluginsService.java +++ b/server/src/main/java/org/elasticsearch/plugins/PluginsService.java @@ -13,6 +13,7 @@ import org.apache.logging.log4j.Logger; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.codecs.PostingsFormat; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.action.admin.cluster.node.info.PluginsAndModules; @@ -477,6 +478,7 @@ static void reloadLuceneSPI(ClassLoader loader) { // Codecs: PostingsFormat.reloadPostingsFormats(loader); DocValuesFormat.reloadDocValuesFormats(loader); + KnnVectorsFormat.reloadKnnVectorsFormat(loader); Codec.reloadCodecs(loader); } From 5207bed98f3c1d2e35ebb960fb6e80bac7195625 Mon Sep 17 00:00:00 2001 From: ChrisHegarty Date: Fri, 20 Jun 2025 10:05:42 +0100 Subject: [PATCH 006/109] Add dependency on cuVS --- gradle/verification-metadata.xml | 5 + x-pack/plugin/gpu/README | 51 +++++ x-pack/plugin/gpu/build.gradle | 5 + .../plugin/gpu/licenses/cuvs-java-LICENSE.txt | 201 ++++++++++++++++++ .../plugin/gpu/licenses/cuvs-java-NOTICE.txt | 0 .../plugin/gpu/src/main/java/module-info.java | 1 + .../xpack/gpu/codec/GPUDepsTest.java | 25 +++ .../codec/GPUDenseVectorFieldMapperTests.java | 1 + 8 files changed, 289 insertions(+) create mode 100644 x-pack/plugin/gpu/README create mode 100644 x-pack/plugin/gpu/licenses/cuvs-java-LICENSE.txt create mode 100644 x-pack/plugin/gpu/licenses/cuvs-java-NOTICE.txt create mode 100644 x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUDepsTest.java diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 3605e71501873..ff9dc16081a38 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -1128,6 +1128,11 @@ + + + + + diff --git a/x-pack/plugin/gpu/README b/x-pack/plugin/gpu/README new file mode 100644 index 0000000000000..4ec5a550434c0 --- /dev/null +++ b/x-pack/plugin/gpu/README @@ -0,0 +1,51 @@ + + +For local development on mac, where cuVS is not easily built one can +minimally get an IDEA environment with compile-time support by building +a minimal cuvs-java "api" jar. Test cannot be run. + +1. Clone cuvs + git clone https://github.com/rapidsai/cuvs/; cd cuvs + git checkout branch-25.08 + cd java/cuvs-java + +2. Remove the implementation compile target from the pom.xml, e.g. + +$ git diff +diff --git a/java/cuvs-java/pom.xml b/java/cuvs-java/pom.xml +index 15e0193a..446f2b61 100644 +--- a/java/cuvs-java/pom.xml ++++ b/java/cuvs-java/pom.xml +@@ -123,20 +123,6 @@ + + + +- +- compile-java-22 +- compile +- +- compile +- +- +- 22 +- +- ${project.basedir}/src/main/java22 +- +- true +- +- + + + + +3. Build and install into local maven repository + + export JAVA_HOME=/Users/chegar/binaries/jdk-22.0.2.jdk/Contents/Home/ + cd java/cuvs-java + mv install + Installs into maven local repository, e.g: + /Users/chegar/.m2/repository/com/nvidia/cuvs/cuvs-java/25.08.0/ + + Might need to modify gradle metadata sha in gradle/verification-metadata.xml + + diff --git a/x-pack/plugin/gpu/build.gradle b/x-pack/plugin/gpu/build.gradle index f5880c83209d0..48036e8dae137 100644 --- a/x-pack/plugin/gpu/build.gradle +++ b/x-pack/plugin/gpu/build.gradle @@ -12,9 +12,14 @@ base { archivesName = 'x-pack-gpu' } +repositories { + mavenLocal() +} + dependencies { compileOnly project(path: xpackModule('core')) compileOnly project(':server') + implementation 'com.nvidia.cuvs:cuvs-java:25.08.0' testImplementation(testArtifact(project(xpackModule('core')))) testImplementation(testArtifact(project(':server'))) clusterModules project(xpackModule('gpu')) diff --git a/x-pack/plugin/gpu/licenses/cuvs-java-LICENSE.txt b/x-pack/plugin/gpu/licenses/cuvs-java-LICENSE.txt new file mode 100644 index 0000000000000..1a89b9054d669 --- /dev/null +++ b/x-pack/plugin/gpu/licenses/cuvs-java-LICENSE.txt @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2020 NVIDIA Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/x-pack/plugin/gpu/licenses/cuvs-java-NOTICE.txt b/x-pack/plugin/gpu/licenses/cuvs-java-NOTICE.txt new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/x-pack/plugin/gpu/src/main/java/module-info.java b/x-pack/plugin/gpu/src/main/java/module-info.java index 67c76e0507a56..1abd0ec84fe6b 100644 --- a/x-pack/plugin/gpu/src/main/java/module-info.java +++ b/x-pack/plugin/gpu/src/main/java/module-info.java @@ -11,6 +11,7 @@ requires org.elasticsearch.xcontent; requires org.elasticsearch.server; requires org.elasticsearch.base; + requires com.nvidia.cuvs; provides org.apache.lucene.codecs.KnnVectorsFormat with org.elasticsearch.xpack.gpu.codec.GPUVectorsFormat; } diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUDepsTest.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUDepsTest.java new file mode 100644 index 0000000000000..2185220c7e3c5 --- /dev/null +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUDepsTest.java @@ -0,0 +1,25 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.gpu.codec; + +import com.nvidia.cuvs.CuVSResources; +import com.nvidia.cuvs.LibraryException; + +// Delete me just a test +class GPUDepsTest { + + void foo() { + try { + var resources = CuVSResources.create(); + } catch (LibraryException ex) { + throw ex; + } catch (Throwable e) { + throw new RuntimeException(e); + } + } +} diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java index 550e26ebf5ad3..f3504112f31aa 100644 --- a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java @@ -62,6 +62,7 @@ public void testGPUParsing() throws IOException { } public void testGPUParsingFailureInRelease() { + assumeTrue("feature flag [gpu_format] must be enabled", GPU_FORMAT.isEnabled() == false); Exception e = expectThrows( MapperParsingException.class, () -> createDocumentMapper( From d0ee7e1c335fe9879e7378931c7dd1fd20231ce8 Mon Sep 17 00:00:00 2001 From: ChrisHegarty Date: Fri, 20 Jun 2025 10:39:11 +0100 Subject: [PATCH 007/109] Add a supported check for cuVS - can be used in more tests eventually --- .../upgrades/FullClusterRestartIT.java | 21 +++++++++++++--- .../plugin/gpu/src/main/java/module-info.java | 1 + .../xpack/gpu/codec/GPUDepsTest.java | 25 ------------------- .../xpack/gpu/codec/GPUVectorsFormat.java | 22 ++++++++++++++++ .../gpu/codec/GPUVectorsFormatTests.java | 6 +++++ 5 files changed, 47 insertions(+), 28 deletions(-) delete mode 100644 x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUDepsTest.java diff --git a/qa/full-cluster-restart/src/javaRestTest/java/org/elasticsearch/upgrades/FullClusterRestartIT.java b/qa/full-cluster-restart/src/javaRestTest/java/org/elasticsearch/upgrades/FullClusterRestartIT.java index cbe383a7da550..fa242a4e14102 100644 --- a/qa/full-cluster-restart/src/javaRestTest/java/org/elasticsearch/upgrades/FullClusterRestartIT.java +++ b/qa/full-cluster-restart/src/javaRestTest/java/org/elasticsearch/upgrades/FullClusterRestartIT.java @@ -1004,7 +1004,11 @@ public void testSnapshotRestore() throws IOException { if (minimumIndexVersion().before(IndexVersions.V_8_0_0) && randomBoolean()) { settings.put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), randomBoolean()); } - createIndex(index, settings.build()); + System.out.println("HEGO settings=" + settings); + var resp = createIndex(index, settings.build()); + // Map snapResponse = entityAsMap(resp); + System.out.println("HEGO createIndex response=" + resp); + indexRandomDocuments(count, true, true, randomBoolean(), i -> jsonBuilder().startObject().field("field", "value").endObject()); } else { count = countOfIndexedRandomDocuments(); @@ -1094,9 +1098,18 @@ public void testSnapshotRestore() throws IOException { (builder, params) -> builder.field("indices", index) ); createSnapshot.addParameter("wait_for_completion", "true"); - client().performRequest(createSnapshot); + System.out.println("HEGO createSnapshot=" + createSnapshot); + + var resp = client().performRequest(createSnapshot); + + System.out.println("HEGO createSnapshot resp=" + resp); + + System.out.println("HEGO Build.current().version()=" + Build.current().version()); + System.out.println("HEGO IndexVersion.current()=" + IndexVersion.current()); + System.out.println("HEGO getOldClusterVersion=" + getOldClusterVersion()); + System.out.println("HEGO getOldClusterIndexVersion=" + getOldClusterIndexVersion()); - checkSnapshot("old_snap", count, getOldClusterVersion(), getOldClusterIndexVersion()); + checkSnapshot("old_snap", count, getOldClusterVersion(), getOldClusterIndexVersion()); // HERE if (false == isRunningAgainstOldCluster()) { checkSnapshot("new_snap", count, Build.current().version(), IndexVersion.current()); } @@ -1282,6 +1295,7 @@ private void checkSnapshot(String snapshotName, int count, String tookOnVersion, // Check the snapshot metadata, especially the version Request listSnapshotRequest = new Request("GET", "/_snapshot/repo/" + snapshotName); Map snapResponse = entityAsMap(client().performRequest(listSnapshotRequest)); + System.out.println("HEGO listSnapshotRequest response=" + snapResponse); assertEquals(singletonList(snapshotName), XContentMapValues.extractValue("snapshots.snapshot", snapResponse)); assertEquals(singletonList("SUCCESS"), XContentMapValues.extractValue("snapshots.state", snapResponse)); @@ -1290,6 +1304,7 @@ private void checkSnapshot(String snapshotName, int count, String tookOnVersion, // which could affect the top range of the index release version String firstReleaseVersion = tookOnIndexVersion.toReleaseVersion().split("-")[0]; assertThat( + // HERE (Iterable) XContentMapValues.extractValue("snapshots.version", snapResponse), anyOf( contains(tookOnVersion), diff --git a/x-pack/plugin/gpu/src/main/java/module-info.java b/x-pack/plugin/gpu/src/main/java/module-info.java index 1abd0ec84fe6b..d3c8616688bf9 100644 --- a/x-pack/plugin/gpu/src/main/java/module-info.java +++ b/x-pack/plugin/gpu/src/main/java/module-info.java @@ -7,6 +7,7 @@ /** Provides GPU-accelerated support for vector search. */ module org.elasticsearch.gpu { + requires org.elasticsearch.logging; requires org.apache.lucene.core; requires org.elasticsearch.xcontent; requires org.elasticsearch.server; diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUDepsTest.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUDepsTest.java deleted file mode 100644 index 2185220c7e3c5..0000000000000 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUDepsTest.java +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.gpu.codec; - -import com.nvidia.cuvs.CuVSResources; -import com.nvidia.cuvs.LibraryException; - -// Delete me just a test -class GPUDepsTest { - - void foo() { - try { - var resources = CuVSResources.create(); - } catch (LibraryException ex) { - throw ex; - } catch (Throwable e) { - throw new RuntimeException(e); - } - } -} diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormat.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormat.java index b512ec7582592..be28f640a7ad4 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormat.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormat.java @@ -7,6 +7,8 @@ package org.elasticsearch.xpack.gpu.codec; +import com.nvidia.cuvs.CuVSResources; + import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.codecs.KnnVectorsReader; import org.apache.lucene.codecs.KnnVectorsWriter; @@ -16,6 +18,8 @@ import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; +import org.elasticsearch.logging.LogManager; +import org.elasticsearch.logging.Logger; import java.io.IOException; @@ -25,6 +29,8 @@ */ public class GPUVectorsFormat extends KnnVectorsFormat { + private static final Logger LOG = LogManager.getLogger(GPUVectorsFormat.class); + public static final String NAME = "GPUVectorsFormat"; public static final String GPU_IDX_EXTENSION = "gpuidx"; public static final String GPU_META_EXTENSION = "mgpu"; @@ -69,4 +75,20 @@ static GPUVectorsReader getGPUReader(KnnVectorsReader vectorsReader, String fiel } return null; } + + /** Tells whether the platform supports cuvs. */ + public static boolean supported() { + try (var resources = CuVSResources.create()) { + return true; + } catch (UnsupportedOperationException uoe) { + var msg = uoe.getMessage() == null ? "" : ": " + uoe.getMessage(); + LOG.warn("cuvs is not supported on this platform or java version" + msg); + } catch (Throwable t) { + if (t instanceof ExceptionInInitializerError ex) { + t = ex.getCause(); + } + LOG.warn("Exception occurred during creation of cuvs resources. " + t); + } + return false; + } } diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormatTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormatTests.java index 6de297fdc89ee..15b75a597a994 100644 --- a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormatTests.java +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormatTests.java @@ -14,6 +14,7 @@ import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; import org.apache.lucene.tests.util.TestUtil; import org.elasticsearch.common.logging.LogConfigurator; +import org.junit.BeforeClass; public class GPUVectorsFormatTests extends BaseKnnVectorsFormatTestCase { @@ -22,6 +23,11 @@ public class GPUVectorsFormatTests extends BaseKnnVectorsFormatTestCase { LogConfigurator.configureESLogging(); // native access requires logging to be initialized } + @BeforeClass + public static void beforeClass() { + assumeTrue("cuvs not supported", GPUVectorsFormat.supported()); + } + static final Codec codec = TestUtil.alwaysKnnVectorsFormat(new GPUVectorsFormat()); @Override From 9b6fde255c71acabf064dce01f151752f33804c2 Mon Sep 17 00:00:00 2001 From: ChrisHegarty Date: Fri, 20 Jun 2025 10:39:53 +0100 Subject: [PATCH 008/109] revert accidental commit --- .../upgrades/FullClusterRestartIT.java | 21 +++---------------- 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/qa/full-cluster-restart/src/javaRestTest/java/org/elasticsearch/upgrades/FullClusterRestartIT.java b/qa/full-cluster-restart/src/javaRestTest/java/org/elasticsearch/upgrades/FullClusterRestartIT.java index fa242a4e14102..cbe383a7da550 100644 --- a/qa/full-cluster-restart/src/javaRestTest/java/org/elasticsearch/upgrades/FullClusterRestartIT.java +++ b/qa/full-cluster-restart/src/javaRestTest/java/org/elasticsearch/upgrades/FullClusterRestartIT.java @@ -1004,11 +1004,7 @@ public void testSnapshotRestore() throws IOException { if (minimumIndexVersion().before(IndexVersions.V_8_0_0) && randomBoolean()) { settings.put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), randomBoolean()); } - System.out.println("HEGO settings=" + settings); - var resp = createIndex(index, settings.build()); - // Map snapResponse = entityAsMap(resp); - System.out.println("HEGO createIndex response=" + resp); - + createIndex(index, settings.build()); indexRandomDocuments(count, true, true, randomBoolean(), i -> jsonBuilder().startObject().field("field", "value").endObject()); } else { count = countOfIndexedRandomDocuments(); @@ -1098,18 +1094,9 @@ public void testSnapshotRestore() throws IOException { (builder, params) -> builder.field("indices", index) ); createSnapshot.addParameter("wait_for_completion", "true"); - System.out.println("HEGO createSnapshot=" + createSnapshot); - - var resp = client().performRequest(createSnapshot); - - System.out.println("HEGO createSnapshot resp=" + resp); - - System.out.println("HEGO Build.current().version()=" + Build.current().version()); - System.out.println("HEGO IndexVersion.current()=" + IndexVersion.current()); - System.out.println("HEGO getOldClusterVersion=" + getOldClusterVersion()); - System.out.println("HEGO getOldClusterIndexVersion=" + getOldClusterIndexVersion()); + client().performRequest(createSnapshot); - checkSnapshot("old_snap", count, getOldClusterVersion(), getOldClusterIndexVersion()); // HERE + checkSnapshot("old_snap", count, getOldClusterVersion(), getOldClusterIndexVersion()); if (false == isRunningAgainstOldCluster()) { checkSnapshot("new_snap", count, Build.current().version(), IndexVersion.current()); } @@ -1295,7 +1282,6 @@ private void checkSnapshot(String snapshotName, int count, String tookOnVersion, // Check the snapshot metadata, especially the version Request listSnapshotRequest = new Request("GET", "/_snapshot/repo/" + snapshotName); Map snapResponse = entityAsMap(client().performRequest(listSnapshotRequest)); - System.out.println("HEGO listSnapshotRequest response=" + snapResponse); assertEquals(singletonList(snapshotName), XContentMapValues.extractValue("snapshots.snapshot", snapResponse)); assertEquals(singletonList("SUCCESS"), XContentMapValues.extractValue("snapshots.state", snapResponse)); @@ -1304,7 +1290,6 @@ private void checkSnapshot(String snapshotName, int count, String tookOnVersion, // which could affect the top range of the index release version String firstReleaseVersion = tookOnIndexVersion.toReleaseVersion().split("-")[0]; assertThat( - // HERE (Iterable) XContentMapValues.extractValue("snapshots.version", snapResponse), anyOf( contains(tookOnVersion), From 2ac22b3d96a5526e0325b2b22b106a675210294e Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Fri, 20 Jun 2025 14:17:44 -0400 Subject: [PATCH 009/109] Small edits --- x-pack/plugin/gpu/README | 2 +- ...{GPUYamlTestSuiteIT.java => GPUClientYamlTestSuiteIT.java} | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) rename x-pack/plugin/gpu/src/yamlRestTest/java/org/elasticsearch/xpack/gpu/{GPUYamlTestSuiteIT.java => GPUClientYamlTestSuiteIT.java} (88%) diff --git a/x-pack/plugin/gpu/README b/x-pack/plugin/gpu/README index 4ec5a550434c0..a56d18aef7ec5 100644 --- a/x-pack/plugin/gpu/README +++ b/x-pack/plugin/gpu/README @@ -42,7 +42,7 @@ index 15e0193a..446f2b61 100644 export JAVA_HOME=/Users/chegar/binaries/jdk-22.0.2.jdk/Contents/Home/ cd java/cuvs-java - mv install + mvn install Installs into maven local repository, e.g: /Users/chegar/.m2/repository/com/nvidia/cuvs/cuvs-java/25.08.0/ diff --git a/x-pack/plugin/gpu/src/yamlRestTest/java/org/elasticsearch/xpack/gpu/GPUYamlTestSuiteIT.java b/x-pack/plugin/gpu/src/yamlRestTest/java/org/elasticsearch/xpack/gpu/GPUClientYamlTestSuiteIT.java similarity index 88% rename from x-pack/plugin/gpu/src/yamlRestTest/java/org/elasticsearch/xpack/gpu/GPUYamlTestSuiteIT.java rename to x-pack/plugin/gpu/src/yamlRestTest/java/org/elasticsearch/xpack/gpu/GPUClientYamlTestSuiteIT.java index eee4c704c80ea..d975ec84423a6 100644 --- a/x-pack/plugin/gpu/src/yamlRestTest/java/org/elasticsearch/xpack/gpu/GPUYamlTestSuiteIT.java +++ b/x-pack/plugin/gpu/src/yamlRestTest/java/org/elasticsearch/xpack/gpu/GPUClientYamlTestSuiteIT.java @@ -13,7 +13,7 @@ import org.elasticsearch.test.rest.yaml.ESClientYamlSuiteTestCase; import org.junit.ClassRule; -public class GPUYamlTestSuiteIT extends ESClientYamlSuiteTestCase { +public class GPUClientYamlTestSuiteIT extends ESClientYamlSuiteTestCase { @ClassRule public static ElasticsearchCluster cluster = ElasticsearchCluster.local() @@ -23,7 +23,7 @@ public class GPUYamlTestSuiteIT extends ESClientYamlSuiteTestCase { .setting("xpack.security.enabled", "false") .build(); - public GPUYamlTestSuiteIT(final ClientYamlTestCandidate testCandidate) { + public GPUClientYamlTestSuiteIT(final ClientYamlTestCandidate testCandidate) { super(testCandidate); } From 8cee75e0714887c1369e40ce75fce28f3042a1ea Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Mon, 23 Jun 2025 15:55:59 -0400 Subject: [PATCH 010/109] Build cagra index (iter1) --- .../xpack/gpu/codec/GPUVectorsFormat.java | 11 +- .../xpack/gpu/codec/GPUVectorsWriter.java | 101 +++++++++++++++--- .../plugin-metadata/entitlement-policy.yaml | 2 + .../gpu/codec/GPUVectorsFormatTests.java | 2 +- .../xpack/gpu/GPUClientYamlTestSuiteIT.java | 2 +- 5 files changed, 99 insertions(+), 19 deletions(-) create mode 100644 x-pack/plugin/gpu/src/main/plugin-metadata/entitlement-policy.yaml diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormat.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormat.java index be28f640a7ad4..51664cee19ff9 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormat.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormat.java @@ -77,18 +77,19 @@ static GPUVectorsReader getGPUReader(KnnVectorsReader vectorsReader, String fiel } /** Tells whether the platform supports cuvs. */ - public static boolean supported() { - try (var resources = CuVSResources.create()) { - return true; + public static CuVSResources cuVSResourcesOrNull() { + try { + var resources = CuVSResources.create(); + return resources; } catch (UnsupportedOperationException uoe) { var msg = uoe.getMessage() == null ? "" : ": " + uoe.getMessage(); - LOG.warn("cuvs is not supported on this platform or java version" + msg); + LOG.warn("GPU based vector search is not supported on this platform or java version" + msg); } catch (Throwable t) { if (t instanceof ExceptionInInitializerError ex) { t = ex.getCause(); } LOG.warn("Exception occurred during creation of cuvs resources. " + t); } - return false; + return null; } } diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsWriter.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsWriter.java index cbca1480c3d3a..d2e3518734cb8 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsWriter.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsWriter.java @@ -7,6 +7,10 @@ package org.elasticsearch.xpack.gpu.codec; +import com.nvidia.cuvs.CagraIndex; +import com.nvidia.cuvs.CagraIndexParams; +import com.nvidia.cuvs.CuVSResources; + import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.KnnFieldVectorsWriter; import org.apache.lucene.codecs.KnnVectorsWriter; @@ -15,33 +19,46 @@ import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FloatVectorValues; import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.KnnVectorValues; import org.apache.lucene.index.MergeState; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.Sorter; import org.apache.lucene.index.VectorEncoding; import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.lucene.store.IndexOutput; +import org.elasticsearch.common.lucene.store.IndexOutputOutputStream; import org.elasticsearch.core.IOUtils; +import org.elasticsearch.logging.LogManager; +import org.elasticsearch.logging.Logger; import java.io.IOException; import java.util.ArrayList; import java.util.List; import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.SIMILARITY_FUNCTIONS; +import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; /** * Writer for GPU-accelerated vectors. */ public class GPUVectorsWriter extends KnnVectorsWriter { + private static final Logger logger = LogManager.getLogger(GPUVectorsWriter.class); + // 2 for now based on https://github.com/rapidsai/cuvs/issues/666, but can be increased later + private static final int MIN_NUM_VECTORS_FOR_GPU_BUILD = 2; private final List fieldWriters = new ArrayList<>(); private final IndexOutput gpuIdx; private final IndexOutput gpuMeta; private final FlatVectorsWriter rawVectorDelegate; private final SegmentWriteState segmentWriteState; + private final CuVSResources cuVSResources; @SuppressWarnings("this-escape") public GPUVectorsWriter(SegmentWriteState state, FlatVectorsWriter rawVectorDelegate) throws IOException { + this.cuVSResources = GPUVectorsFormat.cuVSResourcesOrNull(); + if (cuVSResources == null) { + throw new IllegalArgumentException("GPU based vector search is not supported on this platform or java version"); + } this.segmentWriteState = state; this.rawVectorDelegate = rawVectorDelegate; final String metaFileName = IndexFileNames.segmentFileName( @@ -95,15 +112,63 @@ public final KnnFieldVectorsWriter addField(FieldInfo fieldInfo) throws IOExc @Override public final void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException { rawVectorDelegate.flush(maxDoc, sortMap); + // TODO: implement the case when sortMap != null + for (FieldWriter fieldWriter : fieldWriters) { - // TODO: Implement GPU-specific vector merging instead of bogus implementation + // TODO: can we use MemorySegment instead of passing array of vectors + float[][] vectors = fieldWriter.delegate.getVectors().toArray(float[][]::new); long dataOffset = gpuIdx.alignFilePointer(Float.BYTES); - var vectors = fieldWriter.delegate.getVectors(); - for (int i = 0; i < vectors.size(); i++) { - gpuIdx.writeVInt(0); + try { + buildAndwriteGPUIndex(fieldWriter.fieldInfo.getVectorSimilarityFunction(), vectors); + long dataLength = gpuIdx.getFilePointer() - dataOffset; + writeMeta(fieldWriter.fieldInfo, dataOffset, dataLength); + } catch (IOException e) { + throw e; + } catch (Throwable t) { + throw new IOException("Failed to write GPU index: ", t); + } + } + } + + private void buildAndwriteGPUIndex(VectorSimilarityFunction similarityFunction, float[][] vectors) throws Throwable { + // TODO: should we Lucene HNSW index write here + if (vectors.length < MIN_NUM_VECTORS_FOR_GPU_BUILD) { + if (logger.isDebugEnabled()) { + logger.debug("Skip building carga index; vectors length {} < {}", vectors.length, MIN_NUM_VECTORS_FOR_GPU_BUILD); + } + return; + } + + CagraIndexParams.CuvsDistanceType distanceType = switch (similarityFunction) { + case EUCLIDEAN -> CagraIndexParams.CuvsDistanceType.L2Expanded; + case DOT_PRODUCT, MAXIMUM_INNER_PRODUCT -> CagraIndexParams.CuvsDistanceType.InnerProduct; + case COSINE -> CagraIndexParams.CuvsDistanceType.CosineExpanded; + }; + + // TODO: expose cagra index params of intermediate graph degree, graph degre, algorithm, NNDescentNumIterations + CagraIndexParams params = new CagraIndexParams.Builder().withNumWriterThreads(1) // TODO: how many CPU threads we can use? + .withCagraGraphBuildAlgo(CagraIndexParams.CagraGraphBuildAlgo.NN_DESCENT) + .withMetric(distanceType) + .build(); + + // build index on GPU + long startTime = System.nanoTime(); + var index = CagraIndex.newBuilder(cuVSResources).withDataset(vectors).withIndexParams(params).build(); + if (logger.isDebugEnabled()) { + logger.debug("Carga index created in: {} ms; #num vectors: {}", (System.nanoTime() - startTime) / 1_000_000.0, vectors.length); + } + + // TODO: do serialization through MemorySegment instead of a temp file + // serialize index for CPU consumption + startTime = System.nanoTime(); + var gpuIndexOutputStream = new IndexOutputOutputStream(gpuIdx); + try { + index.serialize(gpuIndexOutputStream); + if (logger.isDebugEnabled()) { + logger.debug("Carga index serialized in: {} ms", (System.nanoTime() - startTime) / 1_000_000.0); } - long dataLength = gpuIdx.getFilePointer() - dataOffset; - writeMeta(fieldWriter.fieldInfo, dataOffset, dataLength); + } finally { + index.destroyIndex(); } } @@ -111,14 +176,25 @@ public final void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException { public final void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOException { if (fieldInfo.getVectorEncoding().equals(VectorEncoding.FLOAT32)) { rawVectorDelegate.mergeOneField(fieldInfo, mergeState); - // TODO: Implement GPU-specific vector merging instead of bogus implementation - FloatVectorValues floatVectorValues = KnnVectorsWriter.MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState); + FloatVectorValues vectorValues = KnnVectorsWriter.MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState); + // TODO: more efficient way to pass merged vector values to gpuIndex construction + KnnVectorValues.DocIndexIterator iter = vectorValues.iterator(); + List vectorList = new ArrayList<>(); + for (int docV = iter.nextDoc(); docV != NO_MORE_DOCS; docV = iter.nextDoc()) { + vectorList.add(vectorValues.vectorValue(iter.index())); + } + float[][] vectors = vectorList.toArray(new float[0][]); + long dataOffset = gpuIdx.alignFilePointer(Float.BYTES); - for (int i = 0; i < floatVectorValues.size(); i++) { - gpuIdx.writeVInt(0); + try { + buildAndwriteGPUIndex(fieldInfo.getVectorSimilarityFunction(), vectors); + long dataLength = gpuIdx.getFilePointer() - dataOffset; + writeMeta(fieldInfo, dataOffset, dataLength); + } catch (IOException e) { + throw e; + } catch (Throwable t) { + throw new IOException("Failed to write GPU index: ", t); } - long dataLength = gpuIdx.getFilePointer() - dataOffset; - writeMeta(fieldInfo, dataOffset, dataLength); } else { rawVectorDelegate.mergeOneField(fieldInfo, mergeState); } @@ -157,6 +233,7 @@ public final void finish() throws IOException { @Override public final void close() throws IOException { IOUtils.close(rawVectorDelegate, gpuMeta, gpuIdx); + cuVSResources.close(); } @Override diff --git a/x-pack/plugin/gpu/src/main/plugin-metadata/entitlement-policy.yaml b/x-pack/plugin/gpu/src/main/plugin-metadata/entitlement-policy.yaml new file mode 100644 index 0000000000000..d0c571b8538b2 --- /dev/null +++ b/x-pack/plugin/gpu/src/main/plugin-metadata/entitlement-policy.yaml @@ -0,0 +1,2 @@ +com.nvidia.cuvs: + - load_native_libraries diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormatTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormatTests.java index 15b75a597a994..d49fda94b1915 100644 --- a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormatTests.java +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormatTests.java @@ -25,7 +25,7 @@ public class GPUVectorsFormatTests extends BaseKnnVectorsFormatTestCase { @BeforeClass public static void beforeClass() { - assumeTrue("cuvs not supported", GPUVectorsFormat.supported()); + assumeTrue("cuvs not supported", GPUVectorsFormat.cuVSResourcesOrNull() != null); } static final Codec codec = TestUtil.alwaysKnnVectorsFormat(new GPUVectorsFormat()); diff --git a/x-pack/plugin/gpu/src/yamlRestTest/java/org/elasticsearch/xpack/gpu/GPUClientYamlTestSuiteIT.java b/x-pack/plugin/gpu/src/yamlRestTest/java/org/elasticsearch/xpack/gpu/GPUClientYamlTestSuiteIT.java index d975ec84423a6..0f4a7a059b6d4 100644 --- a/x-pack/plugin/gpu/src/yamlRestTest/java/org/elasticsearch/xpack/gpu/GPUClientYamlTestSuiteIT.java +++ b/x-pack/plugin/gpu/src/yamlRestTest/java/org/elasticsearch/xpack/gpu/GPUClientYamlTestSuiteIT.java @@ -19,7 +19,7 @@ public class GPUClientYamlTestSuiteIT extends ESClientYamlSuiteTestCase { public static ElasticsearchCluster cluster = ElasticsearchCluster.local() .nodes(1) .module("gpu") - .setting("xpack.license.self_generated.type", "basic") + .setting("xpack.license.self_generated.type", "trial") .setting("xpack.security.enabled", "false") .build(); From 3f05ba01ca8fd15bb8d8969599a26a180cbd474d Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Thu, 3 Jul 2025 14:01:51 -0400 Subject: [PATCH 011/109] Build cagra index (iter2) --- .../xpack/gpu/codec/GPUVectorsWriter.java | 91 ++++++++++++++++++- 1 file changed, 87 insertions(+), 4 deletions(-) diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsWriter.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsWriter.java index d2e3518734cb8..44b24c76cabba 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsWriter.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsWriter.java @@ -25,9 +25,11 @@ import org.apache.lucene.index.Sorter; import org.apache.lucene.index.VectorEncoding; import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.elasticsearch.common.lucene.store.IndexOutputOutputStream; import org.elasticsearch.core.IOUtils; +import org.elasticsearch.core.SuppressForbidden; import org.elasticsearch.logging.LogManager; import org.elasticsearch.logging.Logger; @@ -130,6 +132,7 @@ public final void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException { } } + @SuppressForbidden(reason = "require usage of Lucene's IOUtils#deleteFilesIgnoringExceptions(...)") private void buildAndwriteGPUIndex(VectorSimilarityFunction similarityFunction, float[][] vectors) throws Throwable { // TODO: should we Lucene HNSW index write here if (vectors.length < MIN_NUM_VECTORS_FOR_GPU_BUILD) { @@ -139,6 +142,7 @@ private void buildAndwriteGPUIndex(VectorSimilarityFunction similarityFunction, return; } + int dimension = vectors[0].length; CagraIndexParams.CuvsDistanceType distanceType = switch (similarityFunction) { case EUCLIDEAN -> CagraIndexParams.CuvsDistanceType.L2Expanded; case DOT_PRODUCT, MAXIMUM_INNER_PRODUCT -> CagraIndexParams.CuvsDistanceType.InnerProduct; @@ -159,16 +163,95 @@ private void buildAndwriteGPUIndex(VectorSimilarityFunction similarityFunction, } // TODO: do serialization through MemorySegment instead of a temp file - // serialize index for CPU consumption + // serialize index for CPU consumption to hnwslib format startTime = System.nanoTime(); - var gpuIndexOutputStream = new IndexOutputOutputStream(gpuIdx); + IndexOutput tempCagraHNSW = null; + boolean success = false; try { - index.serialize(gpuIndexOutputStream); + tempCagraHNSW = segmentWriteState.directory.createTempOutput(gpuIdx.getName(), "cagra_hnws_temp", segmentWriteState.context); + var tempCagraHNSWOutputStream = new IndexOutputOutputStream(tempCagraHNSW); + index.serializeToHNSW(tempCagraHNSWOutputStream); + success = true; if (logger.isDebugEnabled()) { - logger.debug("Carga index serialized in: {} ms", (System.nanoTime() - startTime) / 1_000_000.0); + logger.debug("Carga index serialized to hnswlib format in: {} ms", (System.nanoTime() - startTime) / 1_000_000.0); } } finally { index.destroyIndex(); + if (success) { + IOUtils.close(tempCagraHNSW); + } else { + IOUtils.closeWhileHandlingException(tempCagraHNSW); + if (tempCagraHNSW != null) { + org.apache.lucene.util.IOUtils.deleteFilesIgnoringExceptions(segmentWriteState.directory, tempCagraHNSW.getName()); + } + } + } + + // convert hnswlib format to Lucene HNSW format + startTime = System.nanoTime(); + success = false; + IndexInput tempCagraHNSWInput = null; + try { + tempCagraHNSWInput = segmentWriteState.directory.openInput(tempCagraHNSW.getName(), segmentWriteState.context); + // read the metadata from the hnlswlib format + // some of them are not used in Lucene HNSW format + tempCagraHNSWInput.readLong(); // offSetLevel0 + long maxElementCount = tempCagraHNSWInput.readLong(); + tempCagraHNSWInput.readLong(); // currElementCount + long sizeDataPerElement = tempCagraHNSWInput.readLong(); + long labelOffset = tempCagraHNSWInput.readLong(); + long dataOffset = tempCagraHNSWInput.readLong(); + int maxLevel = tempCagraHNSWInput.readInt(); + tempCagraHNSWInput.readInt(); // entryPointNode + tempCagraHNSWInput.readLong(); // maxM + long maxM0 = tempCagraHNSWInput.readLong(); // number of graph connections + tempCagraHNSWInput.readLong(); // M + tempCagraHNSWInput.readLong(); // mult + tempCagraHNSWInput.readLong(); // efConstruction + + assert (maxLevel == 1) : "Cagra index is flat, maxLevel must be: 1, got: " + maxLevel; + int maxGraphDegree = (int) maxM0; + int[] connections = new int[maxGraphDegree]; + int dimensionCalculated = (int) ((labelOffset - dataOffset) / Float.BYTES); + assert (dimension == dimensionCalculated) + : "Cagra index vector dimension must be: " + dimension + ", got: " + dimensionCalculated; + + // read graph from the cagra_hnswlib index and write it to the Lucene HNSW format + gpuIdx.writeInt((int) maxElementCount); + gpuIdx.writeInt((int) maxM0); + for (int i = 0; i < maxElementCount; i++) { + // read from the cagra_hnswlib index + int graphDegree = tempCagraHNSWInput.readInt(); + assert (graphDegree == maxGraphDegree) + : "In Cagra graph all nodes must have the same number of connections : " + maxGraphDegree + ", got" + graphDegree; + for (int j = 0; j < graphDegree; j++) { + connections[j] = tempCagraHNSWInput.readInt(); + } + // Skip over the vector data + tempCagraHNSWInput.seek(tempCagraHNSWInput.getFilePointer() + dimension * Float.BYTES); + // Skip over the label/id + tempCagraHNSWInput.seek(tempCagraHNSWInput.getFilePointer() + Long.BYTES); + + // write graph + gpuIdx.writeVInt(graphDegree); + for (int neighbor : connections) { + gpuIdx.writeVInt(neighbor); + } + } + + success = true; + if (logger.isDebugEnabled()) { + logger.debug("cagra_hnws index serialized to Lucene HNSW in: {} ms", (System.nanoTime() - startTime) / 1_000_000.0); + } + } finally { + if (success) { + IOUtils.close(tempCagraHNSWInput); + } else { + IOUtils.closeWhileHandlingException(tempCagraHNSWInput); + } + if (tempCagraHNSW != null) { + org.apache.lucene.util.IOUtils.deleteFilesIgnoringExceptions(segmentWriteState.directory, tempCagraHNSW.getName()); + } } } From 2ac9d8b29f4aefdc82e19a5e5fe1005cfcde8bf5 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Sun, 6 Jul 2025 10:20:22 -0400 Subject: [PATCH 012/109] Write Cagra index to Lucene HNSW format First save Cagra index to hnswlib format on disk. Read this disk file to serialize to Lucene HNSW format. --- .../gpu/codec/GPUToHNSWVectorsWriter.java | 554 ++++++++++++++++++ .../xpack/gpu/codec/GPUVectorsFormat.java | 39 +- .../xpack/gpu/codec/GPUVectorsReader.java | 218 ------- .../xpack/gpu/codec/GPUVectorsWriter.java | 328 ----------- .../gpu/codec/GPUVectorsFormatTests.java | 47 +- 5 files changed, 617 insertions(+), 569 deletions(-) create mode 100644 x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java delete mode 100644 x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsReader.java delete mode 100644 x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsWriter.java diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java new file mode 100644 index 0000000000000..6b83934783cc0 --- /dev/null +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java @@ -0,0 +1,554 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.gpu.codec; + +import com.nvidia.cuvs.CagraIndex; +import com.nvidia.cuvs.CagraIndexParams; +import com.nvidia.cuvs.CuVSResources; + +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.KnnFieldVectorsWriter; +import org.apache.lucene.codecs.KnnVectorsWriter; +import org.apache.lucene.codecs.hnsw.FlatFieldVectorsWriter; +import org.apache.lucene.codecs.hnsw.FlatVectorsWriter; +import org.apache.lucene.index.DocsWithFieldSet; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FloatVectorValues; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.KnnVectorValues; +import org.apache.lucene.index.MergeState; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.Sorter; +import org.apache.lucene.index.VectorEncoding; +import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.hnsw.HnswGraph; +import org.apache.lucene.util.hnsw.HnswGraph.NodesIterator; +import org.apache.lucene.util.packed.DirectMonotonicWriter; +import org.elasticsearch.common.lucene.store.IndexOutputOutputStream; +import org.elasticsearch.core.IOUtils; +import org.elasticsearch.core.SuppressForbidden; +import org.elasticsearch.logging.LogManager; +import org.elasticsearch.logging.Logger; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Objects; + +import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.SIMILARITY_FUNCTIONS; +import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; +import static org.elasticsearch.xpack.gpu.codec.GPUVectorsFormat.LUCENE99_HNSW_META_CODEC_NAME; +import static org.elasticsearch.xpack.gpu.codec.GPUVectorsFormat.LUCENE99_HNSW_META_EXTENSION; +import static org.elasticsearch.xpack.gpu.codec.GPUVectorsFormat.LUCENE99_HNSW_VECTOR_INDEX_CODEC_NAME; +import static org.elasticsearch.xpack.gpu.codec.GPUVectorsFormat.LUCENE99_HNSW_VECTOR_INDEX_EXTENSION; +import static org.elasticsearch.xpack.gpu.codec.GPUVectorsFormat.LUCENE99_VERSION_CURRENT; +import static org.elasticsearch.xpack.gpu.codec.GPUVectorsFormat.MIN_NUM_VECTORS_FOR_GPU_BUILD; + +/** + * Writer that builds a Nvidia Carga Graph on GPU and than writes it into the Lucene99 HNSW format, + * so that it can be searched on CPU with Lucene99HNSWVectorReader. + */ +final class GPUToHNSWVectorsWriter extends KnnVectorsWriter { + private static final Logger logger = LogManager.getLogger(GPUToHNSWVectorsWriter.class); + private static final long SHALLOW_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(GPUToHNSWVectorsWriter.class); + private static final int LUCENE99_HNSW_DIRECT_MONOTONIC_BLOCK_SHIFT = 16; + + private final CuVSResources cuVSResources; + private final SegmentWriteState segmentWriteState; + private final IndexOutput meta, vectorIndex; + private final int M; + private final int beamWidth; + private final FlatVectorsWriter flatVectorWriter; + + private final List fields = new ArrayList<>(); + private boolean finished; + + GPUToHNSWVectorsWriter(CuVSResources cuVSResources, SegmentWriteState state, int M, int beamWidth, FlatVectorsWriter flatVectorWriter) + throws IOException { + assert cuVSResources != null : "CuVSResources must not be null"; + this.cuVSResources = cuVSResources; + this.M = M; + this.flatVectorWriter = flatVectorWriter; + this.beamWidth = beamWidth; + this.segmentWriteState = state; + String metaFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, LUCENE99_HNSW_META_EXTENSION); + String indexDataFileName = IndexFileNames.segmentFileName( + state.segmentInfo.name, + state.segmentSuffix, + LUCENE99_HNSW_VECTOR_INDEX_EXTENSION + ); + boolean success = false; + try { + meta = state.directory.createOutput(metaFileName, state.context); + vectorIndex = state.directory.createOutput(indexDataFileName, state.context); + CodecUtil.writeIndexHeader( + meta, + LUCENE99_HNSW_META_CODEC_NAME, + LUCENE99_VERSION_CURRENT, + state.segmentInfo.getId(), + state.segmentSuffix + ); + CodecUtil.writeIndexHeader( + vectorIndex, + LUCENE99_HNSW_VECTOR_INDEX_CODEC_NAME, + LUCENE99_VERSION_CURRENT, + state.segmentInfo.getId(), + state.segmentSuffix + ); + success = true; + } finally { + if (success == false) { + org.elasticsearch.core.IOUtils.closeWhileHandlingException(this); + } + } + } + + @Override + public KnnFieldVectorsWriter addField(FieldInfo fieldInfo) throws IOException { + if (fieldInfo.getVectorEncoding().equals(VectorEncoding.FLOAT32) == false) { + throw new IllegalArgumentException( + "Field [" + fieldInfo.name + "] must have FLOAT32 encoding, got: " + fieldInfo.getVectorEncoding() + ); + } + @SuppressWarnings("unchecked") + FlatFieldVectorsWriter flatFieldWriter = (FlatFieldVectorsWriter) flatVectorWriter.addField(fieldInfo); + FieldWriter newField = new FieldWriter(flatFieldWriter, fieldInfo); + fields.add(newField); + return newField; + } + + @Override + public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException { + flatVectorWriter.flush(maxDoc, sortMap); + for (FieldWriter field : fields) { + if (sortMap == null) { + writeField(field); + } else { + writeSortingField(field, sortMap); + } + } + } + + @Override + public void finish() throws IOException { + if (finished) { + throw new IllegalStateException("already finished"); + } + finished = true; + flatVectorWriter.finish(); + + if (meta != null) { + // write end of fields marker + meta.writeInt(-1); + CodecUtil.writeFooter(meta); + } + if (vectorIndex != null) { + CodecUtil.writeFooter(vectorIndex); + } + } + + @Override + public long ramBytesUsed() { + long total = SHALLOW_RAM_BYTES_USED; + for (FieldWriter field : fields) { + // the field tracks the delegate field usage + total += field.ramBytesUsed(); + } + return total; + } + + private void writeField(FieldWriter fieldWriter) throws IOException { + float[][] vectors = fieldWriter.flatFieldVectorsWriter.getVectors().toArray(float[][]::new); + writeFieldInternal(fieldWriter.fieldInfo, vectors); + } + + private void writeSortingField(FieldWriter fieldData, Sorter.DocMap sortMap) throws IOException { + // TODO: implement writing sorted field when we can access cagra index through MemorySegment + // as we need random access to neighbors in the graph. + throw new UnsupportedOperationException("Writing field with index sorted needs to be implemented."); + } + + private void writeFieldInternal(FieldInfo fieldInfo, float[][] vectors) throws IOException { + try { + long vectorIndexOffset = vectorIndex.getFilePointer(); + int[][] graphLevelNodeOffsets = new int[1][]; + HnswGraph mockGraph; + if (vectors.length < MIN_NUM_VECTORS_FOR_GPU_BUILD) { + if (logger.isDebugEnabled()) { + logger.debug( + "Skip building carga index; vectors length {} < {} (min for GPU)", + vectors.length, + MIN_NUM_VECTORS_FOR_GPU_BUILD + ); + } + mockGraph = writeGraph(vectors, graphLevelNodeOffsets); + } else { + String tempCagraHNSWFileName = buildGPUIndex(fieldInfo.getVectorSimilarityFunction(), vectors); + assert tempCagraHNSWFileName != null : "GPU index should be built for field: " + fieldInfo.name; + mockGraph = writeGraph(tempCagraHNSWFileName, graphLevelNodeOffsets); + } + long vectorIndexLength = vectorIndex.getFilePointer() - vectorIndexOffset; + writeMeta(fieldInfo, vectorIndexOffset, vectorIndexLength, vectors.length, mockGraph, graphLevelNodeOffsets); + } catch (IOException e) { + throw e; + } catch (Throwable t) { + throw new IOException("Failed to write GPU index: ", t); + } + } + + @SuppressForbidden(reason = "require usage of Lucene's IOUtils#deleteFilesIgnoringExceptions(...)") + private String buildGPUIndex(VectorSimilarityFunction similarityFunction, float[][] vectors) throws Throwable { + CagraIndexParams.CuvsDistanceType distanceType = switch (similarityFunction) { + case EUCLIDEAN -> CagraIndexParams.CuvsDistanceType.L2Expanded; + case DOT_PRODUCT, MAXIMUM_INNER_PRODUCT -> CagraIndexParams.CuvsDistanceType.InnerProduct; + case COSINE -> CagraIndexParams.CuvsDistanceType.CosineExpanded; + }; + + // TODO: expose cagra index params of intermediate graph degree, graph degree, algorithm, NNDescentNumIterations + CagraIndexParams params = new CagraIndexParams.Builder().withNumWriterThreads(1) // TODO: how many CPU threads we can use? + .withCagraGraphBuildAlgo(CagraIndexParams.CagraGraphBuildAlgo.NN_DESCENT) + .withMetric(distanceType) + .build(); + + // build index on GPU + long startTime = System.nanoTime(); + var index = CagraIndex.newBuilder(cuVSResources).withDataset(vectors).withIndexParams(params).build(); + if (logger.isDebugEnabled()) { + logger.debug("Carga index created in: {} ms; #num vectors: {}", (System.nanoTime() - startTime) / 1_000_000.0, vectors.length); + } + + // TODO: do serialization through MemorySegment instead of a temp file + // serialize index for CPU consumption to the hnwslib format + startTime = System.nanoTime(); + IndexOutput tempCagraHNSW = null; + boolean success = false; + try { + tempCagraHNSW = segmentWriteState.directory.createTempOutput( + vectorIndex.getName(), + "cagra_hnws_temp", + segmentWriteState.context + ); + var tempCagraHNSWOutputStream = new IndexOutputOutputStream(tempCagraHNSW); + index.serializeToHNSW(tempCagraHNSWOutputStream); + if (logger.isDebugEnabled()) { + logger.debug("Carga index serialized to hnswlib format in: {} ms", (System.nanoTime() - startTime) / 1_000_000.0); + } + success = true; + } finally { + index.destroyIndex(); + if (success) { + org.elasticsearch.core.IOUtils.close(tempCagraHNSW); + } else { + if (tempCagraHNSW != null) { + IOUtils.closeWhileHandlingException(tempCagraHNSW); + org.apache.lucene.util.IOUtils.deleteFilesIgnoringExceptions(segmentWriteState.directory, tempCagraHNSW.getName()); + } + } + } + return tempCagraHNSW.getName(); + } + + @SuppressForbidden(reason = "require usage of Lucene's IOUtils#deleteFilesIgnoringExceptions(...)") + private HnswGraph writeGraph(String tempCagraHNSWFileName, int[][] levelNodeOffsets) throws IOException { + long startTime = System.nanoTime(); + boolean success = false; + IndexInput tempCagraHNSWInput = null; + int maxElementCount; + int maxGraphDegree; + + try { + tempCagraHNSWInput = segmentWriteState.directory.openInput(tempCagraHNSWFileName, segmentWriteState.context); + // read the metadata from the hnlswlib format; + // some of them are not used in the Lucene HNSW format + tempCagraHNSWInput.readLong(); // offSetLevel0 + maxElementCount = (int) tempCagraHNSWInput.readLong(); + tempCagraHNSWInput.readLong(); // currElementCount + tempCagraHNSWInput.readLong(); // sizeDataPerElement + long labelOffset = tempCagraHNSWInput.readLong(); + long dataOffset = tempCagraHNSWInput.readLong(); + int maxLevel = tempCagraHNSWInput.readInt(); + tempCagraHNSWInput.readInt(); // entryPointNode + tempCagraHNSWInput.readLong(); // maxM + long maxM0 = tempCagraHNSWInput.readLong(); // number of graph connections + tempCagraHNSWInput.readLong(); // M + tempCagraHNSWInput.readLong(); // mult + tempCagraHNSWInput.readLong(); // efConstruction + + assert (maxLevel == 1) : "Cagra index is flat, maxLevel must be: 1, got: " + maxLevel; + maxGraphDegree = (int) maxM0; + int[] neighbors = new int[maxGraphDegree]; + int dimension = (int) ((labelOffset - dataOffset) / Float.BYTES); + // assert (dimension == dimensionCalculated) + // : "Cagra index vector dimension must be: " + dimension + ", got: " + dimensionCalculated; + + levelNodeOffsets[0] = new int[maxElementCount]; + + // read graph from the cagra_hnswlib index and write it to the Lucene vectorIndex file + int[] scratch = new int[maxGraphDegree]; + for (int node = 0; node < maxElementCount; node++) { + // read from the cagra_hnswlib index + int nodeDegree = tempCagraHNSWInput.readInt(); + assert (nodeDegree == maxGraphDegree) + : "In Cagra graph all nodes must have the same number of connections : " + maxGraphDegree + ", got" + nodeDegree; + for (int i = 0; i < nodeDegree; i++) { + neighbors[i] = tempCagraHNSWInput.readInt(); + } + // Skip over the vector data + tempCagraHNSWInput.seek(tempCagraHNSWInput.getFilePointer() + dimension * Float.BYTES); + // Skip over the label/id + tempCagraHNSWInput.seek(tempCagraHNSWInput.getFilePointer() + Long.BYTES); + + // write to the Lucene vectorIndex file + long offsetStart = vectorIndex.getFilePointer(); + Arrays.sort(neighbors); + int actualSize = 0; + scratch[actualSize++] = neighbors[0]; + for (int i = 1; i < nodeDegree; i++) { + assert neighbors[i] < maxElementCount : "node too large: " + neighbors[i] + ">=" + maxElementCount; + if (neighbors[i - 1] == neighbors[i]) { + continue; + } + scratch[actualSize++] = neighbors[i] - neighbors[i - 1]; + } + // Write the size after duplicates are removed + vectorIndex.writeVInt(actualSize); + for (int i = 0; i < actualSize; i++) { + vectorIndex.writeVInt(scratch[i]); + } + levelNodeOffsets[0][node] = Math.toIntExact(vectorIndex.getFilePointer() - offsetStart); + } + if (logger.isDebugEnabled()) { + logger.debug("cagra_hnws index serialized to Lucene HNSW in: {} ms", (System.nanoTime() - startTime) / 1_000_000.0); + } + success = true; + } finally { + if (success) { + IOUtils.close(tempCagraHNSWInput); + } else { + IOUtils.closeWhileHandlingException(tempCagraHNSWInput); + } + org.apache.lucene.util.IOUtils.deleteFilesIgnoringExceptions(segmentWriteState.directory, tempCagraHNSWFileName); + } + return createMockGraph(maxElementCount, maxGraphDegree); + } + + // create a graph where every node is connected to every other node + private HnswGraph writeGraph(float[][] vectors, int[][] levelNodeOffsets) throws IOException { + if (vectors.length == 0) { + return null; + } + int elementCount = vectors.length; + int nodeDegree = vectors.length - 1; + levelNodeOffsets[0] = new int[elementCount]; + + int[] neighbors = new int[nodeDegree]; + int[] scratch = new int[nodeDegree]; + for (int node = 0; node < elementCount; node++) { + if (nodeDegree > 0) { + for (int j = 0; j < nodeDegree; j++) { + neighbors[j] = j < node ? j : j + 1; // skip self + } + scratch[0] = neighbors[0]; + for (int i = 1; i < nodeDegree; i++) { + scratch[i] = neighbors[i] - neighbors[i - 1]; + } + } + + long offsetStart = vectorIndex.getFilePointer(); + vectorIndex.writeVInt(nodeDegree); + for (int i = 0; i < nodeDegree; i++) { + vectorIndex.writeVInt(scratch[i]); + } + levelNodeOffsets[0][node] = Math.toIntExact(vectorIndex.getFilePointer() - offsetStart); + } + return createMockGraph(elementCount, nodeDegree); + } + + private static HnswGraph createMockGraph(int elementCount, int graphDegree) { + return new HnswGraph() { + @Override + public int nextNeighbor() { + throw new UnsupportedOperationException("Not supported on a mock graph"); + } + + @Override + public void seek(int level, int target) { + throw new UnsupportedOperationException("Not supported on a mock graph"); + } + + @Override + public int size() { + return elementCount; + } + + @Override + public int numLevels() { + return 1; + } + + @Override + public int maxConn() { + return graphDegree; + } + + @Override + public int entryNode() { + throw new UnsupportedOperationException("Not supported on a mock graph"); + } + + @Override + public int neighborCount() { + throw new UnsupportedOperationException("Not supported on a mock graph"); + } + + @Override + public NodesIterator getNodesOnLevel(int level) { + return new ArrayNodesIterator(size()); + } + }; + } + + // TODO check with deleted documents + @Override + public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOException { + flatVectorWriter.mergeOneField(fieldInfo, mergeState); + FloatVectorValues vectorValues = KnnVectorsWriter.MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState); + // TODO: more efficient way to pass merged vector values to gpuIndex construction + KnnVectorValues.DocIndexIterator iter = vectorValues.iterator(); + List vectorList = new ArrayList<>(); + for (int docV = iter.nextDoc(); docV != NO_MORE_DOCS; docV = iter.nextDoc()) { + vectorList.add(vectorValues.vectorValue(iter.index())); + } + float[][] vectors = vectorList.toArray(new float[0][]); + + writeFieldInternal(fieldInfo, vectors); + } + + private void writeMeta( + FieldInfo field, + long vectorIndexOffset, + long vectorIndexLength, + int count, + HnswGraph graph, + int[][] graphLevelNodeOffsets + ) throws IOException { + meta.writeInt(field.number); + meta.writeInt(field.getVectorEncoding().ordinal()); + meta.writeInt(distFuncToOrd(field.getVectorSimilarityFunction())); + meta.writeVLong(vectorIndexOffset); + meta.writeVLong(vectorIndexLength); + meta.writeVInt(field.getVectorDimension()); + meta.writeInt(count); + // write graph nodes on each level + if (graph == null) { + meta.writeVInt(M); + meta.writeVInt(0); + } else { + meta.writeVInt(graph.maxConn()); + meta.writeVInt(graph.numLevels()); + long valueCount = 0; + + for (int level = 0; level < graph.numLevels(); level++) { + NodesIterator nodesOnLevel = graph.getNodesOnLevel(level); + valueCount += nodesOnLevel.size(); + if (level > 0) { + int[] nol = new int[nodesOnLevel.size()]; + int numberConsumed = nodesOnLevel.consume(nol); + Arrays.sort(nol); + assert numberConsumed == nodesOnLevel.size(); + meta.writeVInt(nol.length); // number of nodes on a level + for (int i = nodesOnLevel.size() - 1; i > 0; --i) { + nol[i] -= nol[i - 1]; + } + for (int n : nol) { + assert n >= 0 : "delta encoding for nodes failed; expected nodes to be sorted"; + meta.writeVInt(n); + } + } else { + assert nodesOnLevel.size() == count : "Level 0 expects to have all nodes"; + } + } + long start = vectorIndex.getFilePointer(); + meta.writeLong(start); + meta.writeVInt(LUCENE99_HNSW_DIRECT_MONOTONIC_BLOCK_SHIFT); + final DirectMonotonicWriter memoryOffsetsWriter = DirectMonotonicWriter.getInstance( + meta, + vectorIndex, + valueCount, + LUCENE99_HNSW_DIRECT_MONOTONIC_BLOCK_SHIFT + ); + long cumulativeOffsetSum = 0; + for (int[] levelOffsets : graphLevelNodeOffsets) { + for (int v : levelOffsets) { + memoryOffsetsWriter.add(cumulativeOffsetSum); + cumulativeOffsetSum += v; + } + } + memoryOffsetsWriter.finish(); + meta.writeLong(vectorIndex.getFilePointer() - start); + } + } + + @Override + public void close() throws IOException { + IOUtils.close(meta, vectorIndex, flatVectorWriter); + } + + static int distFuncToOrd(VectorSimilarityFunction func) { + for (int i = 0; i < SIMILARITY_FUNCTIONS.size(); i++) { + if (SIMILARITY_FUNCTIONS.get(i).equals(func)) { + return (byte) i; + } + } + throw new IllegalArgumentException("invalid distance function: " + func); + } + + private static class FieldWriter extends KnnFieldVectorsWriter { + private static final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(FieldWriter.class); + + private final FieldInfo fieldInfo; + private int lastDocID = -1; + private final FlatFieldVectorsWriter flatFieldVectorsWriter; + + FieldWriter(FlatFieldVectorsWriter flatFieldVectorsWriter, FieldInfo fieldInfo) { + this.fieldInfo = fieldInfo; + this.flatFieldVectorsWriter = Objects.requireNonNull(flatFieldVectorsWriter); + } + + @Override + public void addValue(int docID, float[] vectorValue) throws IOException { + if (docID == lastDocID) { + throw new IllegalArgumentException( + "VectorValuesField \"" + + fieldInfo.name + + "\" appears more than once in this document (only one value is allowed per field)" + ); + } + flatFieldVectorsWriter.addValue(docID, vectorValue); + lastDocID = docID; + } + + public DocsWithFieldSet getDocsWithFieldSet() { + return flatFieldVectorsWriter.getDocsWithFieldSet(); + } + + @Override + public float[] copyValue(float[] vectorValue) { + throw new UnsupportedOperationException(); + } + + @Override + public long ramBytesUsed() { + return SHALLOW_SIZE + flatFieldVectorsWriter.ramBytesUsed(); + } + } +} diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormat.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormat.java index 51664cee19ff9..756ab3a3da2e0 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormat.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormat.java @@ -15,7 +15,7 @@ import org.apache.lucene.codecs.hnsw.FlatVectorScorerUtil; import org.apache.lucene.codecs.hnsw.FlatVectorsFormat; import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsFormat; -import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; +import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; import org.elasticsearch.logging.LogManager; @@ -34,11 +34,20 @@ public class GPUVectorsFormat extends KnnVectorsFormat { public static final String NAME = "GPUVectorsFormat"; public static final String GPU_IDX_EXTENSION = "gpuidx"; public static final String GPU_META_EXTENSION = "mgpu"; - public static final int VERSION_START = 0; + + static final String LUCENE99_HNSW_META_CODEC_NAME = "Lucene99HnswVectorsFormatMeta"; + static final String LUCENE99_HNSW_VECTOR_INDEX_CODEC_NAME = "Lucene99HnswVectorsFormatIndex"; + static final String LUCENE99_HNSW_META_EXTENSION = "vem"; + static final String LUCENE99_HNSW_VECTOR_INDEX_EXTENSION = "vex"; + static final int LUCENE99_VERSION_CURRENT = VERSION_START; public static final int VERSION_CURRENT = VERSION_START; - private static final FlatVectorsFormat rawVectorFormat = new Lucene99FlatVectorsFormat( + static final int DEFAULT_MAX_CONN = 16; + static final int DEFAULT_BEAM_WIDTH = 100; + static final int MIN_NUM_VECTORS_FOR_GPU_BUILD = 2; + + private static final FlatVectorsFormat flatVectorsFormat = new Lucene99FlatVectorsFormat( FlatVectorScorerUtil.getLucene99FlatVectorsScorer() ); @@ -48,12 +57,22 @@ public GPUVectorsFormat() { @Override public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException { - return new GPUVectorsWriter(state, rawVectorFormat.fieldsWriter(state)); + CuVSResources cuVSResources = cuVSResourcesOrNull(); + if (cuVSResources == null) { + throw new IllegalArgumentException("GPU based vector search is not supported on this platform or java version"); + } + return new GPUToHNSWVectorsWriter( + cuVSResources, + state, + DEFAULT_MAX_CONN, + DEFAULT_BEAM_WIDTH, + flatVectorsFormat.fieldsWriter(state) + ); } @Override public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException { - return new GPUVectorsReader(state, rawVectorFormat.fieldsReader(state)); + return new Lucene99HnswVectorsReader(state, flatVectorsFormat.fieldsReader(state)); } @Override @@ -66,16 +85,6 @@ public String toString() { return NAME + "()"; } - static GPUVectorsReader getGPUReader(KnnVectorsReader vectorsReader, String fieldName) { - if (vectorsReader instanceof PerFieldKnnVectorsFormat.FieldsReader candidateReader) { - vectorsReader = candidateReader.getFieldReader(fieldName); - } - if (vectorsReader instanceof GPUVectorsReader reader) { - return reader; - } - return null; - } - /** Tells whether the platform supports cuvs. */ public static CuVSResources cuVSResourcesOrNull() { try { diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsReader.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsReader.java deleted file mode 100644 index ae426916efd62..0000000000000 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsReader.java +++ /dev/null @@ -1,218 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.gpu.codec; - -import org.apache.lucene.codecs.CodecUtil; -import org.apache.lucene.codecs.KnnVectorsReader; -import org.apache.lucene.codecs.hnsw.FlatVectorsReader; -import org.apache.lucene.index.ByteVectorValues; -import org.apache.lucene.index.CorruptIndexException; -import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.FieldInfos; -import org.apache.lucene.index.FloatVectorValues; -import org.apache.lucene.index.IndexFileNames; -import org.apache.lucene.index.SegmentReadState; -import org.apache.lucene.index.VectorEncoding; -import org.apache.lucene.index.VectorSimilarityFunction; -import org.apache.lucene.internal.hppc.IntObjectHashMap; -import org.apache.lucene.search.KnnCollector; -import org.apache.lucene.store.ChecksumIndexInput; -import org.apache.lucene.store.DataInput; -import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.IndexInput; -import org.apache.lucene.util.Bits; -import org.apache.lucene.util.hnsw.OrdinalTranslatedKnnCollector; -import org.apache.lucene.util.hnsw.RandomVectorScorer; -import org.elasticsearch.core.IOUtils; - -import java.io.IOException; - -import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.SIMILARITY_FUNCTIONS; - -/** - * Reader for GPU-accelerated vectors. This reader is used to read the GPU vectors from the index. - */ -public class GPUVectorsReader extends KnnVectorsReader { - - private final IndexInput gpuIdx; - private final SegmentReadState state; - private final FieldInfos fieldInfos; - protected final IntObjectHashMap fields; - private final FlatVectorsReader rawVectorsReader; - - @SuppressWarnings("this-escape") - public GPUVectorsReader(SegmentReadState state, FlatVectorsReader rawVectorsReader) throws IOException { - this.state = state; - this.fieldInfos = state.fieldInfos; - this.rawVectorsReader = rawVectorsReader; - this.fields = new IntObjectHashMap<>(); - String meta = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, GPUVectorsFormat.GPU_META_EXTENSION); - - int versionMeta = -1; - boolean success = false; - try (ChecksumIndexInput gpuMeta = state.directory.openChecksumInput(meta)) { - Throwable priorE = null; - try { - versionMeta = CodecUtil.checkIndexHeader( - gpuMeta, - GPUVectorsFormat.NAME, - GPUVectorsFormat.VERSION_START, - GPUVectorsFormat.VERSION_CURRENT, - state.segmentInfo.getId(), - state.segmentSuffix - ); - readFields(gpuMeta); - } catch (Throwable exception) { - priorE = exception; - } finally { - CodecUtil.checkFooter(gpuMeta, priorE); - } - gpuIdx = openDataInput(state, versionMeta, GPUVectorsFormat.GPU_IDX_EXTENSION, GPUVectorsFormat.NAME, state.context); - success = true; - } finally { - if (success == false) { - IOUtils.closeWhileHandlingException(this); - } - } - } - - private static IndexInput openDataInput( - SegmentReadState state, - int versionMeta, - String fileExtension, - String codecName, - IOContext context - ) throws IOException { - final String fileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, fileExtension); - final IndexInput in = state.directory.openInput(fileName, context); - boolean success = false; - try { - final int versionVectorData = CodecUtil.checkIndexHeader( - in, - codecName, - GPUVectorsFormat.VERSION_START, - GPUVectorsFormat.VERSION_CURRENT, - state.segmentInfo.getId(), - state.segmentSuffix - ); - if (versionMeta != versionVectorData) { - throw new CorruptIndexException( - "Format versions mismatch: meta=" + versionMeta + ", " + codecName + "=" + versionVectorData, - in - ); - } - CodecUtil.retrieveChecksum(in); - success = true; - return in; - } finally { - if (success == false) { - IOUtils.closeWhileHandlingException(in); - } - } - } - - private void readFields(ChecksumIndexInput meta) throws IOException { - for (int fieldNumber = meta.readInt(); fieldNumber != -1; fieldNumber = meta.readInt()) { - final FieldInfo info = fieldInfos.fieldInfo(fieldNumber); - if (info == null) { - throw new CorruptIndexException("Invalid field number: " + fieldNumber, meta); - } - fields.put(info.number, readField(meta, info)); - } - } - - private FieldEntry readField(IndexInput input, FieldInfo info) throws IOException { - final VectorEncoding vectorEncoding = readVectorEncoding(input); - final VectorSimilarityFunction similarityFunction = readSimilarityFunction(input); - final long dataOffset = input.readLong(); - final long dataLength = input.readLong(); - - if (similarityFunction != info.getVectorSimilarityFunction()) { - throw new IllegalStateException( - "Inconsistent vector similarity function for field=\"" - + info.name - + "\"; " - + similarityFunction - + " != " - + info.getVectorSimilarityFunction() - ); - } - return new FieldEntry(similarityFunction, vectorEncoding, dataOffset, dataLength); - } - - private static VectorSimilarityFunction readSimilarityFunction(DataInput input) throws IOException { - final int i = input.readInt(); - if (i < 0 || i >= SIMILARITY_FUNCTIONS.size()) { - throw new IllegalArgumentException("invalid distance function: " + i); - } - return SIMILARITY_FUNCTIONS.get(i); - } - - private static VectorEncoding readVectorEncoding(DataInput input) throws IOException { - final int encodingId = input.readInt(); - if (encodingId < 0 || encodingId >= VectorEncoding.values().length) { - throw new CorruptIndexException("Invalid vector encoding id: " + encodingId, input); - } - return VectorEncoding.values()[encodingId]; - } - - @Override - public final void checkIntegrity() throws IOException { - rawVectorsReader.checkIntegrity(); - CodecUtil.checksumEntireFile(gpuIdx); - } - - @Override - public final FloatVectorValues getFloatVectorValues(String field) throws IOException { - return rawVectorsReader.getFloatVectorValues(field); - } - - @Override - public final ByteVectorValues getByteVectorValues(String field) throws IOException { - return rawVectorsReader.getByteVectorValues(field); - } - - @Override - public final void search(String field, float[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException { - // TODO: Implement GPU-accelerated search - collectAllMatchingDocs(knnCollector, acceptDocs, rawVectorsReader.getRandomVectorScorer(field, target)); - } - - private void collectAllMatchingDocs(KnnCollector knnCollector, Bits acceptDocs, RandomVectorScorer scorer) throws IOException { - OrdinalTranslatedKnnCollector collector = new OrdinalTranslatedKnnCollector(knnCollector, scorer::ordToDoc); - Bits acceptedOrds = scorer.getAcceptOrds(acceptDocs); - for (int i = 0; i < scorer.maxOrd(); i++) { - if (acceptedOrds == null || acceptedOrds.get(i)) { - collector.collect(i, scorer.score(i)); - collector.incVisitedCount(1); - } - } - assert collector.earlyTerminated() == false; - } - - @Override - public final void search(String field, byte[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException { - collectAllMatchingDocs(knnCollector, acceptDocs, rawVectorsReader.getRandomVectorScorer(field, target)); - } - - @Override - public void close() throws IOException { - IOUtils.close(rawVectorsReader, gpuIdx); - } - - protected record FieldEntry( - VectorSimilarityFunction similarityFunction, - VectorEncoding vectorEncoding, - long dataOffset, - long dataLength - ) { - IndexInput dataSlice(IndexInput dataFile) throws IOException { - return dataFile.slice("gpu-data", dataOffset, dataLength); - } - } -} diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsWriter.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsWriter.java deleted file mode 100644 index 44b24c76cabba..0000000000000 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsWriter.java +++ /dev/null @@ -1,328 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.gpu.codec; - -import com.nvidia.cuvs.CagraIndex; -import com.nvidia.cuvs.CagraIndexParams; -import com.nvidia.cuvs.CuVSResources; - -import org.apache.lucene.codecs.CodecUtil; -import org.apache.lucene.codecs.KnnFieldVectorsWriter; -import org.apache.lucene.codecs.KnnVectorsWriter; -import org.apache.lucene.codecs.hnsw.FlatFieldVectorsWriter; -import org.apache.lucene.codecs.hnsw.FlatVectorsWriter; -import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.FloatVectorValues; -import org.apache.lucene.index.IndexFileNames; -import org.apache.lucene.index.KnnVectorValues; -import org.apache.lucene.index.MergeState; -import org.apache.lucene.index.SegmentWriteState; -import org.apache.lucene.index.Sorter; -import org.apache.lucene.index.VectorEncoding; -import org.apache.lucene.index.VectorSimilarityFunction; -import org.apache.lucene.store.IndexInput; -import org.apache.lucene.store.IndexOutput; -import org.elasticsearch.common.lucene.store.IndexOutputOutputStream; -import org.elasticsearch.core.IOUtils; -import org.elasticsearch.core.SuppressForbidden; -import org.elasticsearch.logging.LogManager; -import org.elasticsearch.logging.Logger; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.SIMILARITY_FUNCTIONS; -import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; - -/** - * Writer for GPU-accelerated vectors. - */ -public class GPUVectorsWriter extends KnnVectorsWriter { - private static final Logger logger = LogManager.getLogger(GPUVectorsWriter.class); - // 2 for now based on https://github.com/rapidsai/cuvs/issues/666, but can be increased later - private static final int MIN_NUM_VECTORS_FOR_GPU_BUILD = 2; - - private final List fieldWriters = new ArrayList<>(); - private final IndexOutput gpuIdx; - private final IndexOutput gpuMeta; - private final FlatVectorsWriter rawVectorDelegate; - private final SegmentWriteState segmentWriteState; - private final CuVSResources cuVSResources; - - @SuppressWarnings("this-escape") - public GPUVectorsWriter(SegmentWriteState state, FlatVectorsWriter rawVectorDelegate) throws IOException { - this.cuVSResources = GPUVectorsFormat.cuVSResourcesOrNull(); - if (cuVSResources == null) { - throw new IllegalArgumentException("GPU based vector search is not supported on this platform or java version"); - } - this.segmentWriteState = state; - this.rawVectorDelegate = rawVectorDelegate; - final String metaFileName = IndexFileNames.segmentFileName( - state.segmentInfo.name, - state.segmentSuffix, - GPUVectorsFormat.GPU_META_EXTENSION - ); - - final String gpuIdxFileName = IndexFileNames.segmentFileName( - state.segmentInfo.name, - state.segmentSuffix, - GPUVectorsFormat.GPU_IDX_EXTENSION - ); - boolean success = false; - try { - gpuMeta = state.directory.createOutput(metaFileName, state.context); - CodecUtil.writeIndexHeader( - gpuMeta, - GPUVectorsFormat.NAME, - GPUVectorsFormat.VERSION_CURRENT, - state.segmentInfo.getId(), - state.segmentSuffix - ); - gpuIdx = state.directory.createOutput(gpuIdxFileName, state.context); - CodecUtil.writeIndexHeader( - gpuIdx, - GPUVectorsFormat.NAME, - GPUVectorsFormat.VERSION_CURRENT, - state.segmentInfo.getId(), - state.segmentSuffix - ); - success = true; - } finally { - if (success == false) { - IOUtils.closeWhileHandlingException(this); - } - } - } - - @Override - public final KnnFieldVectorsWriter addField(FieldInfo fieldInfo) throws IOException { - final FlatFieldVectorsWriter rawVectorDelegate = this.rawVectorDelegate.addField(fieldInfo); - if (fieldInfo.getVectorEncoding().equals(VectorEncoding.FLOAT32)) { - @SuppressWarnings("unchecked") - final FlatFieldVectorsWriter floatWriter = (FlatFieldVectorsWriter) rawVectorDelegate; - fieldWriters.add(new FieldWriter(fieldInfo, floatWriter)); - } - return rawVectorDelegate; - } - - @Override - public final void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException { - rawVectorDelegate.flush(maxDoc, sortMap); - // TODO: implement the case when sortMap != null - - for (FieldWriter fieldWriter : fieldWriters) { - // TODO: can we use MemorySegment instead of passing array of vectors - float[][] vectors = fieldWriter.delegate.getVectors().toArray(float[][]::new); - long dataOffset = gpuIdx.alignFilePointer(Float.BYTES); - try { - buildAndwriteGPUIndex(fieldWriter.fieldInfo.getVectorSimilarityFunction(), vectors); - long dataLength = gpuIdx.getFilePointer() - dataOffset; - writeMeta(fieldWriter.fieldInfo, dataOffset, dataLength); - } catch (IOException e) { - throw e; - } catch (Throwable t) { - throw new IOException("Failed to write GPU index: ", t); - } - } - } - - @SuppressForbidden(reason = "require usage of Lucene's IOUtils#deleteFilesIgnoringExceptions(...)") - private void buildAndwriteGPUIndex(VectorSimilarityFunction similarityFunction, float[][] vectors) throws Throwable { - // TODO: should we Lucene HNSW index write here - if (vectors.length < MIN_NUM_VECTORS_FOR_GPU_BUILD) { - if (logger.isDebugEnabled()) { - logger.debug("Skip building carga index; vectors length {} < {}", vectors.length, MIN_NUM_VECTORS_FOR_GPU_BUILD); - } - return; - } - - int dimension = vectors[0].length; - CagraIndexParams.CuvsDistanceType distanceType = switch (similarityFunction) { - case EUCLIDEAN -> CagraIndexParams.CuvsDistanceType.L2Expanded; - case DOT_PRODUCT, MAXIMUM_INNER_PRODUCT -> CagraIndexParams.CuvsDistanceType.InnerProduct; - case COSINE -> CagraIndexParams.CuvsDistanceType.CosineExpanded; - }; - - // TODO: expose cagra index params of intermediate graph degree, graph degre, algorithm, NNDescentNumIterations - CagraIndexParams params = new CagraIndexParams.Builder().withNumWriterThreads(1) // TODO: how many CPU threads we can use? - .withCagraGraphBuildAlgo(CagraIndexParams.CagraGraphBuildAlgo.NN_DESCENT) - .withMetric(distanceType) - .build(); - - // build index on GPU - long startTime = System.nanoTime(); - var index = CagraIndex.newBuilder(cuVSResources).withDataset(vectors).withIndexParams(params).build(); - if (logger.isDebugEnabled()) { - logger.debug("Carga index created in: {} ms; #num vectors: {}", (System.nanoTime() - startTime) / 1_000_000.0, vectors.length); - } - - // TODO: do serialization through MemorySegment instead of a temp file - // serialize index for CPU consumption to hnwslib format - startTime = System.nanoTime(); - IndexOutput tempCagraHNSW = null; - boolean success = false; - try { - tempCagraHNSW = segmentWriteState.directory.createTempOutput(gpuIdx.getName(), "cagra_hnws_temp", segmentWriteState.context); - var tempCagraHNSWOutputStream = new IndexOutputOutputStream(tempCagraHNSW); - index.serializeToHNSW(tempCagraHNSWOutputStream); - success = true; - if (logger.isDebugEnabled()) { - logger.debug("Carga index serialized to hnswlib format in: {} ms", (System.nanoTime() - startTime) / 1_000_000.0); - } - } finally { - index.destroyIndex(); - if (success) { - IOUtils.close(tempCagraHNSW); - } else { - IOUtils.closeWhileHandlingException(tempCagraHNSW); - if (tempCagraHNSW != null) { - org.apache.lucene.util.IOUtils.deleteFilesIgnoringExceptions(segmentWriteState.directory, tempCagraHNSW.getName()); - } - } - } - - // convert hnswlib format to Lucene HNSW format - startTime = System.nanoTime(); - success = false; - IndexInput tempCagraHNSWInput = null; - try { - tempCagraHNSWInput = segmentWriteState.directory.openInput(tempCagraHNSW.getName(), segmentWriteState.context); - // read the metadata from the hnlswlib format - // some of them are not used in Lucene HNSW format - tempCagraHNSWInput.readLong(); // offSetLevel0 - long maxElementCount = tempCagraHNSWInput.readLong(); - tempCagraHNSWInput.readLong(); // currElementCount - long sizeDataPerElement = tempCagraHNSWInput.readLong(); - long labelOffset = tempCagraHNSWInput.readLong(); - long dataOffset = tempCagraHNSWInput.readLong(); - int maxLevel = tempCagraHNSWInput.readInt(); - tempCagraHNSWInput.readInt(); // entryPointNode - tempCagraHNSWInput.readLong(); // maxM - long maxM0 = tempCagraHNSWInput.readLong(); // number of graph connections - tempCagraHNSWInput.readLong(); // M - tempCagraHNSWInput.readLong(); // mult - tempCagraHNSWInput.readLong(); // efConstruction - - assert (maxLevel == 1) : "Cagra index is flat, maxLevel must be: 1, got: " + maxLevel; - int maxGraphDegree = (int) maxM0; - int[] connections = new int[maxGraphDegree]; - int dimensionCalculated = (int) ((labelOffset - dataOffset) / Float.BYTES); - assert (dimension == dimensionCalculated) - : "Cagra index vector dimension must be: " + dimension + ", got: " + dimensionCalculated; - - // read graph from the cagra_hnswlib index and write it to the Lucene HNSW format - gpuIdx.writeInt((int) maxElementCount); - gpuIdx.writeInt((int) maxM0); - for (int i = 0; i < maxElementCount; i++) { - // read from the cagra_hnswlib index - int graphDegree = tempCagraHNSWInput.readInt(); - assert (graphDegree == maxGraphDegree) - : "In Cagra graph all nodes must have the same number of connections : " + maxGraphDegree + ", got" + graphDegree; - for (int j = 0; j < graphDegree; j++) { - connections[j] = tempCagraHNSWInput.readInt(); - } - // Skip over the vector data - tempCagraHNSWInput.seek(tempCagraHNSWInput.getFilePointer() + dimension * Float.BYTES); - // Skip over the label/id - tempCagraHNSWInput.seek(tempCagraHNSWInput.getFilePointer() + Long.BYTES); - - // write graph - gpuIdx.writeVInt(graphDegree); - for (int neighbor : connections) { - gpuIdx.writeVInt(neighbor); - } - } - - success = true; - if (logger.isDebugEnabled()) { - logger.debug("cagra_hnws index serialized to Lucene HNSW in: {} ms", (System.nanoTime() - startTime) / 1_000_000.0); - } - } finally { - if (success) { - IOUtils.close(tempCagraHNSWInput); - } else { - IOUtils.closeWhileHandlingException(tempCagraHNSWInput); - } - if (tempCagraHNSW != null) { - org.apache.lucene.util.IOUtils.deleteFilesIgnoringExceptions(segmentWriteState.directory, tempCagraHNSW.getName()); - } - } - } - - @Override - public final void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOException { - if (fieldInfo.getVectorEncoding().equals(VectorEncoding.FLOAT32)) { - rawVectorDelegate.mergeOneField(fieldInfo, mergeState); - FloatVectorValues vectorValues = KnnVectorsWriter.MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState); - // TODO: more efficient way to pass merged vector values to gpuIndex construction - KnnVectorValues.DocIndexIterator iter = vectorValues.iterator(); - List vectorList = new ArrayList<>(); - for (int docV = iter.nextDoc(); docV != NO_MORE_DOCS; docV = iter.nextDoc()) { - vectorList.add(vectorValues.vectorValue(iter.index())); - } - float[][] vectors = vectorList.toArray(new float[0][]); - - long dataOffset = gpuIdx.alignFilePointer(Float.BYTES); - try { - buildAndwriteGPUIndex(fieldInfo.getVectorSimilarityFunction(), vectors); - long dataLength = gpuIdx.getFilePointer() - dataOffset; - writeMeta(fieldInfo, dataOffset, dataLength); - } catch (IOException e) { - throw e; - } catch (Throwable t) { - throw new IOException("Failed to write GPU index: ", t); - } - } else { - rawVectorDelegate.mergeOneField(fieldInfo, mergeState); - } - } - - private void writeMeta(FieldInfo field, long dataOffset, long dataLength) throws IOException { - gpuMeta.writeInt(field.number); - gpuMeta.writeInt(field.getVectorEncoding().ordinal()); - gpuMeta.writeInt(distFuncToOrd(field.getVectorSimilarityFunction())); - gpuMeta.writeLong(dataOffset); - gpuMeta.writeLong(dataLength); - } - - private static int distFuncToOrd(VectorSimilarityFunction func) { - for (int i = 0; i < SIMILARITY_FUNCTIONS.size(); i++) { - if (SIMILARITY_FUNCTIONS.get(i).equals(func)) { - return (byte) i; - } - } - throw new IllegalArgumentException("invalid distance function: " + func); - } - - @Override - public final void finish() throws IOException { - rawVectorDelegate.finish(); - if (gpuMeta != null) { - // write end of fields marker - gpuMeta.writeInt(-1); - CodecUtil.writeFooter(gpuMeta); - } - if (gpuIdx != null) { - CodecUtil.writeFooter(gpuIdx); - } - } - - @Override - public final void close() throws IOException { - IOUtils.close(rawVectorDelegate, gpuMeta, gpuIdx); - cuVSResources.close(); - } - - @Override - public final long ramBytesUsed() { - return rawVectorDelegate.ramBytesUsed(); - } - - private record FieldWriter(FieldInfo fieldInfo, FlatFieldVectorsWriter delegate) {} -} diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormatTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormatTests.java index d49fda94b1915..34b99da255bc7 100644 --- a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormatTests.java +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormatTests.java @@ -46,13 +46,48 @@ protected VectorEncoding randomVectorEncoding() { } @Override - public void testSearchWithVisitedLimit() { - // TODO + public void testRandomBytes() throws Exception { + // No bytes support } @Override - public void testAdvance() throws Exception { - // TODO + public void testSortedIndexBytes() throws Exception { + // No bytes support + } + + @Override + public void testByteVectorScorerIteration() throws Exception { + // No bytes support + } + + @Override + public void testEmptyByteVectorData() throws Exception { + // No bytes support + } + + @Override + public void testMergingWithDifferentByteKnnFields() throws Exception { + // No bytes support + } + + @Override + public void testMismatchedFields() throws Exception { + // No bytes support + } + + @Override + public void testSortedIndex() throws Exception { + // TODO: implement sorted index + } + + @Override + public void testFloatVectorScorerIteration() throws Exception { + // TODO: implement sorted index + } + + @Override + public void testRandom() throws Exception { + // TODO: implement sorted index } public void testToString() { @@ -66,8 +101,4 @@ public KnnVectorsFormat knnVectorsFormat() { assertEquals(expectedPattern, customCodec.knnVectorsFormat().toString()); } - @Override - public void testSortedIndexBytes() throws Exception { - super.testSortedIndexBytes(); - } } From 4826da62ba582801a1c5d45dc786876c35c8b090 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Wed, 16 Jul 2025 15:01:44 -0400 Subject: [PATCH 013/109] Enable GPU indexing test in KnnIndexTester --- qa/vector/build.gradle | 7 +++++++ qa/vector/src/main/java/module-info.java | 1 + .../org/elasticsearch/test/knn/KnnIndexTester.java | 8 +++++++- x-pack/plugin/gpu/src/main/java/module-info.java | 2 ++ .../xpack/gpu/codec/GPUVectorsFormat.java | 12 +++++++----- 5 files changed, 24 insertions(+), 6 deletions(-) diff --git a/qa/vector/build.gradle b/qa/vector/build.gradle index 951efabedbae4..609ea17e0e5d9 100644 --- a/qa/vector/build.gradle +++ b/qa/vector/build.gradle @@ -20,6 +20,9 @@ tasks.named("dependencyLicenses").configure { tasks.named('forbiddenApisMain').configure { enabled = false } +repositories { + mavenLocal() +} dependencies { api "org.apache.lucene:lucene-core:${versions.lucene}" @@ -29,6 +32,7 @@ dependencies { implementation project(':libs:native') implementation project(':libs:logging') implementation project(':server') + implementation project(':x-pack:plugin:gpu') } /** * Task to run the KnnIndexTester with the provided parameters. @@ -42,6 +46,9 @@ tasks.register("checkVec", JavaExec) { systemProperty "es.logger.out", "console" systemProperty "es.logger.level", "INFO" // Change to DEBUG if needed systemProperty 'es.nativelibs.path', TestUtil.getTestLibraryPath(file("../../libs/native/libraries/build/platform/").toString()) + javaLauncher = project.javaToolchains.launcherFor { + languageVersion = JavaLanguageVersion.of(24) + } jvmArgs '-Xms4g', '-Xmx4g', '-Djava.util.concurrent.ForkJoinPool.common.parallelism=8', '-XX:+UnlockDiagnosticVMOptions', '-XX:+DebugNonSafepoints', '-XX:+HeapDumpOnOutOfMemoryError' if (buildParams.getRuntimeJavaVersion().map { it.majorVersion.toInteger() }.get() >= 21) { jvmArgs '--add-modules=jdk.incubator.vector', '--enable-native-access=ALL-UNNAMED' diff --git a/qa/vector/src/main/java/module-info.java b/qa/vector/src/main/java/module-info.java index b6647aafeb01f..0bcb7bc98b651 100644 --- a/qa/vector/src/main/java/module-info.java +++ b/qa/vector/src/main/java/module-info.java @@ -18,4 +18,5 @@ requires org.elasticsearch.logging; requires java.management; requires jdk.management; + requires org.elasticsearch.gpu; } diff --git a/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java b/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java index dcce1fd304b06..a36e547076c2e 100644 --- a/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java +++ b/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java @@ -31,6 +31,7 @@ import org.elasticsearch.xcontent.XContentParser; import org.elasticsearch.xcontent.XContentParserConfiguration; import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xpack.gpu.codec.GPUVectorsFormat; import java.io.InputStream; import java.lang.management.ThreadInfo; @@ -66,13 +67,16 @@ public class KnnIndexTester { enum IndexType { HNSW, FLAT, - IVF + IVF, + GPU } private static String formatIndexPath(CmdLineArgs args) { List suffix = new ArrayList<>(); if (args.indexType() == IndexType.FLAT) { suffix.add("flat"); + } else if (args.indexType() == IndexType.GPU) { + suffix.add("gpu"); } else if (args.indexType() == IndexType.IVF) { suffix.add("ivf"); suffix.add(Integer.toString(args.ivfClusterSize())); @@ -90,6 +94,8 @@ static Codec createCodec(CmdLineArgs args) { final KnnVectorsFormat format; if (args.indexType() == IndexType.IVF) { format = new IVFVectorsFormat(args.ivfClusterSize()); + } else if (args.indexType() == IndexType.GPU) { + format = new GPUVectorsFormat(); } else { if (args.quantizeBits() == 1) { if (args.indexType() == IndexType.FLAT) { diff --git a/x-pack/plugin/gpu/src/main/java/module-info.java b/x-pack/plugin/gpu/src/main/java/module-info.java index d3c8616688bf9..5cfb1f3669798 100644 --- a/x-pack/plugin/gpu/src/main/java/module-info.java +++ b/x-pack/plugin/gpu/src/main/java/module-info.java @@ -14,5 +14,7 @@ requires org.elasticsearch.base; requires com.nvidia.cuvs; + exports org.elasticsearch.xpack.gpu.codec; + provides org.apache.lucene.codecs.KnnVectorsFormat with org.elasticsearch.xpack.gpu.codec.GPUVectorsFormat; } diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormat.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormat.java index 756ab3a3da2e0..c54d76f880678 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormat.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormat.java @@ -32,8 +32,6 @@ public class GPUVectorsFormat extends KnnVectorsFormat { private static final Logger LOG = LogManager.getLogger(GPUVectorsFormat.class); public static final String NAME = "GPUVectorsFormat"; - public static final String GPU_IDX_EXTENSION = "gpuidx"; - public static final String GPU_META_EXTENSION = "mgpu"; public static final int VERSION_START = 0; static final String LUCENE99_HNSW_META_CODEC_NAME = "Lucene99HnswVectorsFormatMeta"; @@ -41,7 +39,6 @@ public class GPUVectorsFormat extends KnnVectorsFormat { static final String LUCENE99_HNSW_META_EXTENSION = "vem"; static final String LUCENE99_HNSW_VECTOR_INDEX_EXTENSION = "vex"; static final int LUCENE99_VERSION_CURRENT = VERSION_START; - public static final int VERSION_CURRENT = VERSION_START; static final int DEFAULT_MAX_CONN = 16; static final int DEFAULT_BEAM_WIDTH = 100; @@ -91,8 +88,13 @@ public static CuVSResources cuVSResourcesOrNull() { var resources = CuVSResources.create(); return resources; } catch (UnsupportedOperationException uoe) { - var msg = uoe.getMessage() == null ? "" : ": " + uoe.getMessage(); - LOG.warn("GPU based vector search is not supported on this platform or java version" + msg); + String msg = ""; + if (uoe.getMessage() == null) { + msg = "Runtime Java version: " + Runtime.version().feature(); + } else { + msg = ": " + uoe.getMessage(); + } + LOG.warn("GPU based vector search is not supported on this platform or java version; " + msg); } catch (Throwable t) { if (t instanceof ExceptionInInitializerError ex) { t = ex.getCause(); From 7c85493348e1c15822ebc7a8fb6411d46a99c75c Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Fri, 18 Jul 2025 15:25:41 -0400 Subject: [PATCH 014/109] Fix merging in GPU index writer --- .../gpu/codec/GPUToHNSWVectorsWriter.java | 149 ++++++++++++++++-- 1 file changed, 134 insertions(+), 15 deletions(-) diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java index 6b83934783cc0..3e5a17d17505f 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java @@ -10,6 +10,7 @@ import com.nvidia.cuvs.CagraIndex; import com.nvidia.cuvs.CagraIndexParams; import com.nvidia.cuvs.CuVSResources; +import com.nvidia.cuvs.Dataset; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.KnnFieldVectorsWriter; @@ -26,6 +27,7 @@ import org.apache.lucene.index.Sorter; import org.apache.lucene.index.VectorEncoding; import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.RamUsageEstimator; @@ -39,6 +41,9 @@ import org.elasticsearch.logging.Logger; import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -166,9 +171,46 @@ public long ramBytesUsed() { return total; } + private static final class DatasetOrVectors { + private final Dataset dataset; + private final float[][] vectors; + + DatasetOrVectors(float[][] vectors) { + this( + vectors.length < MIN_NUM_VECTORS_FOR_GPU_BUILD ? null : Dataset.ofArray(vectors), + vectors.length < MIN_NUM_VECTORS_FOR_GPU_BUILD ? vectors : null + ); + validateState(); + } + + private DatasetOrVectors(Dataset dataset, float[][] vectors) { + this.dataset = dataset; + this.vectors = vectors; + validateState(); + } + + private void validateState() { + if ((dataset == null && vectors == null) || (dataset != null && vectors != null)) { + throw new IllegalStateException("Exactly one of dataset or vectors must be non-null"); + } + } + + int size() { + return dataset != null ? dataset.size() : vectors.length; + } + + Dataset getDataset() { + return dataset; + } + + float[][] getVectors() { + return vectors; + } + } + private void writeField(FieldWriter fieldWriter) throws IOException { float[][] vectors = fieldWriter.flatFieldVectorsWriter.getVectors().toArray(float[][]::new); - writeFieldInternal(fieldWriter.fieldInfo, vectors); + writeFieldInternal(fieldWriter.fieldInfo, new DatasetOrVectors(vectors)); } private void writeSortingField(FieldWriter fieldData, Sorter.DocMap sortMap) throws IOException { @@ -177,12 +219,13 @@ private void writeSortingField(FieldWriter fieldData, Sorter.DocMap sortMap) thr throw new UnsupportedOperationException("Writing field with index sorted needs to be implemented."); } - private void writeFieldInternal(FieldInfo fieldInfo, float[][] vectors) throws IOException { + private void writeFieldInternal(FieldInfo fieldInfo, DatasetOrVectors datasetOrVectors) throws IOException { try { long vectorIndexOffset = vectorIndex.getFilePointer(); int[][] graphLevelNodeOffsets = new int[1][]; HnswGraph mockGraph; - if (vectors.length < MIN_NUM_VECTORS_FOR_GPU_BUILD) { + if (datasetOrVectors.vectors != null) { + float[][] vectors = datasetOrVectors.vectors; if (logger.isDebugEnabled()) { logger.debug( "Skip building carga index; vectors length {} < {} (min for GPU)", @@ -192,12 +235,12 @@ private void writeFieldInternal(FieldInfo fieldInfo, float[][] vectors) throws I } mockGraph = writeGraph(vectors, graphLevelNodeOffsets); } else { - String tempCagraHNSWFileName = buildGPUIndex(fieldInfo.getVectorSimilarityFunction(), vectors); + String tempCagraHNSWFileName = buildGPUIndex(fieldInfo.getVectorSimilarityFunction(), datasetOrVectors.dataset); assert tempCagraHNSWFileName != null : "GPU index should be built for field: " + fieldInfo.name; mockGraph = writeGraph(tempCagraHNSWFileName, graphLevelNodeOffsets); } long vectorIndexLength = vectorIndex.getFilePointer() - vectorIndexOffset; - writeMeta(fieldInfo, vectorIndexOffset, vectorIndexLength, vectors.length, mockGraph, graphLevelNodeOffsets); + writeMeta(fieldInfo, vectorIndexOffset, vectorIndexLength, datasetOrVectors.size(), mockGraph, graphLevelNodeOffsets); } catch (IOException e) { throw e; } catch (Throwable t) { @@ -206,7 +249,7 @@ private void writeFieldInternal(FieldInfo fieldInfo, float[][] vectors) throws I } @SuppressForbidden(reason = "require usage of Lucene's IOUtils#deleteFilesIgnoringExceptions(...)") - private String buildGPUIndex(VectorSimilarityFunction similarityFunction, float[][] vectors) throws Throwable { + private String buildGPUIndex(VectorSimilarityFunction similarityFunction, Dataset dataset) throws Throwable { CagraIndexParams.CuvsDistanceType distanceType = switch (similarityFunction) { case EUCLIDEAN -> CagraIndexParams.CuvsDistanceType.L2Expanded; case DOT_PRODUCT, MAXIMUM_INNER_PRODUCT -> CagraIndexParams.CuvsDistanceType.InnerProduct; @@ -221,9 +264,9 @@ private String buildGPUIndex(VectorSimilarityFunction similarityFunction, float[ // build index on GPU long startTime = System.nanoTime(); - var index = CagraIndex.newBuilder(cuVSResources).withDataset(vectors).withIndexParams(params).build(); + var index = CagraIndex.newBuilder(cuVSResources).withDataset(dataset).withIndexParams(params).build(); if (logger.isDebugEnabled()) { - logger.debug("Carga index created in: {} ms; #num vectors: {}", (System.nanoTime() - startTime) / 1_000_000.0, vectors.length); + logger.debug("Carga index created in: {} ms; #num vectors: {}", (System.nanoTime() - startTime) / 1_000_000.0, dataset.size()); } // TODO: do serialization through MemorySegment instead of a temp file @@ -419,18 +462,94 @@ public NodesIterator getNodesOnLevel(int level) { // TODO check with deleted documents @Override + @SuppressForbidden(reason = "require usage of Lucene's IOUtils#deleteFilesIgnoringExceptions(...)") public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOException { flatVectorWriter.mergeOneField(fieldInfo, mergeState); FloatVectorValues vectorValues = KnnVectorsWriter.MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState); - // TODO: more efficient way to pass merged vector values to gpuIndex construction - KnnVectorValues.DocIndexIterator iter = vectorValues.iterator(); - List vectorList = new ArrayList<>(); - for (int docV = iter.nextDoc(); docV != NO_MORE_DOCS; docV = iter.nextDoc()) { - vectorList.add(vectorValues.vectorValue(iter.index())); + // save merged vector values to a temp file + final int numVectors; + String tempRawVectorsFileName = null; + boolean success = false; + try (IndexOutput out = mergeState.segmentInfo.dir.createTempOutput(mergeState.segmentInfo.name, "vec_", IOContext.DEFAULT)) { + tempRawVectorsFileName = out.getName(); + numVectors = writeFloatVectorValues(fieldInfo, out, MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState)); + CodecUtil.writeFooter(out); + success = true; + } finally { + if (success == false && tempRawVectorsFileName != null) { + org.apache.lucene.util.IOUtils.deleteFilesIgnoringExceptions(mergeState.segmentInfo.dir, tempRawVectorsFileName); + } + } + try (IndexInput in = mergeState.segmentInfo.dir.openInput(tempRawVectorsFileName, IOContext.DEFAULT)) { + // TODO: Improve this (not acceptable): pass tempRawVectorsFileName for the gpuIndex construction through MemorySegment + final FloatVectorValues floatVectorValues = getFloatVectorValues(fieldInfo, in, numVectors); + float[][] vectors = new float[numVectors][fieldInfo.getVectorDimension()]; + float[] vector; + for (int i = 0; i < numVectors; i++) { + vector = floatVectorValues.vectorValue(i); + System.arraycopy(vector, 0, vectors[i], 0, vector.length); + } + DatasetOrVectors datasetOrVectors = new DatasetOrVectors(vectors); + writeFieldInternal(fieldInfo, datasetOrVectors); + } finally { + org.apache.lucene.util.IOUtils.deleteFilesIgnoringExceptions(mergeState.segmentInfo.dir, tempRawVectorsFileName); } - float[][] vectors = vectorList.toArray(new float[0][]); + } + + private static int writeFloatVectorValues(FieldInfo fieldInfo, IndexOutput out, FloatVectorValues floatVectorValues) + throws IOException { + int numVectors = 0; + final ByteBuffer buffer = ByteBuffer.allocate(fieldInfo.getVectorDimension() * Float.BYTES).order(ByteOrder.LITTLE_ENDIAN); + final KnnVectorValues.DocIndexIterator iterator = floatVectorValues.iterator(); + for (int docV = iterator.nextDoc(); docV != NO_MORE_DOCS; docV = iterator.nextDoc()) { + numVectors++; + float[] vector = floatVectorValues.vectorValue(iterator.index()); + out.writeInt(iterator.docID()); + buffer.asFloatBuffer().put(vector); + out.writeBytes(buffer.array(), buffer.array().length); + } + return numVectors; + } + + private static FloatVectorValues getFloatVectorValues(FieldInfo fieldInfo, IndexInput randomAccessInput, int numVectors) { + if (numVectors == 0) { + return FloatVectorValues.fromFloats(List.of(), fieldInfo.getVectorDimension()); + } + final long length = (long) Float.BYTES * fieldInfo.getVectorDimension() + Integer.BYTES; + final float[] vector = new float[fieldInfo.getVectorDimension()]; + return new FloatVectorValues() { + @Override + public float[] vectorValue(int ord) throws IOException { + randomAccessInput.seek(ord * length + Integer.BYTES); + randomAccessInput.readFloats(vector, 0, vector.length); + return vector; + } + + @Override + public FloatVectorValues copy() { + return this; + } - writeFieldInternal(fieldInfo, vectors); + @Override + public int dimension() { + return fieldInfo.getVectorDimension(); + } + + @Override + public int size() { + return numVectors; + } + + @Override + public int ordToDoc(int ord) { + try { + randomAccessInput.seek(ord * length); + return randomAccessInput.readInt(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + }; } private void writeMeta( From ec3330ede09bf2725d5b967cbfde521d32eff2ab Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Sat, 19 Jul 2025 14:00:23 -0400 Subject: [PATCH 015/109] Attempt to use Java 22 DatasetImpl with MemorySegment --- .../gpu/codec/GPUToHNSWVectorsWriter.java | 94 ++++++++----------- 1 file changed, 39 insertions(+), 55 deletions(-) diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java index 3e5a17d17505f..0a9e871cb62f4 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java @@ -42,8 +42,13 @@ import java.io.IOException; import java.io.UncheckedIOException; +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.nio.channels.FileChannel; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -460,19 +465,36 @@ public NodesIterator getNodesOnLevel(int level) { }; } - // TODO check with deleted documents @Override @SuppressForbidden(reason = "require usage of Lucene's IOUtils#deleteFilesIgnoringExceptions(...)") public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOException { + int dims = fieldInfo.getVectorDimension(); flatVectorWriter.mergeOneField(fieldInfo, mergeState); - FloatVectorValues vectorValues = KnnVectorsWriter.MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState); - // save merged vector values to a temp file + FloatVectorValues mergeFloatVectorValues = MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState); + + if (mergeFloatVectorValues.size() < MIN_NUM_VECTORS_FOR_GPU_BUILD) { + // TODO: check how deleted documents affect size value + KnnVectorValues.DocIndexIterator iter = mergeFloatVectorValues.iterator(); + float[] vector = new float[dims]; + List vectorsList = new ArrayList<>(); + for (int docV = iter.nextDoc(); docV != NO_MORE_DOCS; docV = iter.nextDoc()) { + System.arraycopy(mergeFloatVectorValues.vectorValue(iter.index()), 0, vector, 0, dims); + vectorsList.add(vector); + } + float[][] vectors = vectorsList.toArray(new float[0][]); + DatasetOrVectors datasetOrVectors = new DatasetOrVectors(vectors); + writeFieldInternal(fieldInfo, datasetOrVectors); + return; + } + + final int numVectors; String tempRawVectorsFileName = null; boolean success = false; + // save merged vectors to a temporary file try (IndexOutput out = mergeState.segmentInfo.dir.createTempOutput(mergeState.segmentInfo.name, "vec_", IOContext.DEFAULT)) { tempRawVectorsFileName = out.getName(); - numVectors = writeFloatVectorValues(fieldInfo, out, MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState)); + numVectors = writeFloatVectorValues(fieldInfo, out, mergeFloatVectorValues); CodecUtil.writeFooter(out); success = true; } finally { @@ -480,16 +502,19 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE org.apache.lucene.util.IOUtils.deleteFilesIgnoringExceptions(mergeState.segmentInfo.dir, tempRawVectorsFileName); } } - try (IndexInput in = mergeState.segmentInfo.dir.openInput(tempRawVectorsFileName, IOContext.DEFAULT)) { - // TODO: Improve this (not acceptable): pass tempRawVectorsFileName for the gpuIndex construction through MemorySegment - final FloatVectorValues floatVectorValues = getFloatVectorValues(fieldInfo, in, numVectors); - float[][] vectors = new float[numVectors][fieldInfo.getVectorDimension()]; - float[] vector; - for (int i = 0; i < numVectors; i++) { - vector = floatVectorValues.vectorValue(i); - System.arraycopy(vector, 0, vectors[i], 0, vector.length); - } - DatasetOrVectors datasetOrVectors = new DatasetOrVectors(vectors); + // Use MemorySegment to map the temp file and pass it as a dataset for building the GPU index + try { + final Path path = ((org.apache.lucene.store.FSDirectory) mergeState.segmentInfo.dir).getDirectory().resolve(tempRawVectorsFileName); + Arena arena = Arena.ofShared(); + FileChannel fileChannel = FileChannel.open(path, StandardOpenOption.READ); + final MemorySegment memorySegment = fileChannel.map( + FileChannel.MapMode.READ_ONLY, + 0, + fileChannel.size() - CodecUtil.footerLength(), + arena + ); + Dataset dataset = new DatasetImpl(arena, memorySegment, numVectors, fieldInfo.getVectorDimension()); + DatasetOrVectors datasetOrVectors = new DatasetOrVectors(dataset, null); writeFieldInternal(fieldInfo, datasetOrVectors); } finally { org.apache.lucene.util.IOUtils.deleteFilesIgnoringExceptions(mergeState.segmentInfo.dir, tempRawVectorsFileName); @@ -511,47 +536,6 @@ private static int writeFloatVectorValues(FieldInfo fieldInfo, IndexOutput out, return numVectors; } - private static FloatVectorValues getFloatVectorValues(FieldInfo fieldInfo, IndexInput randomAccessInput, int numVectors) { - if (numVectors == 0) { - return FloatVectorValues.fromFloats(List.of(), fieldInfo.getVectorDimension()); - } - final long length = (long) Float.BYTES * fieldInfo.getVectorDimension() + Integer.BYTES; - final float[] vector = new float[fieldInfo.getVectorDimension()]; - return new FloatVectorValues() { - @Override - public float[] vectorValue(int ord) throws IOException { - randomAccessInput.seek(ord * length + Integer.BYTES); - randomAccessInput.readFloats(vector, 0, vector.length); - return vector; - } - - @Override - public FloatVectorValues copy() { - return this; - } - - @Override - public int dimension() { - return fieldInfo.getVectorDimension(); - } - - @Override - public int size() { - return numVectors; - } - - @Override - public int ordToDoc(int ord) { - try { - randomAccessInput.seek(ord * length); - return randomAccessInput.readInt(); - } catch (IOException e) { - throw new UncheckedIOException(e); - } - } - }; - } - private void writeMeta( FieldInfo field, long vectorIndexOffset, From 723bb5a565c3da0290360e8641141b2f67849b5f Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Tue, 22 Jul 2025 13:47:46 -0400 Subject: [PATCH 016/109] Revert "Attempt to use Java 22 DatasetImpl with MemorySegment" This reverts commit ec3330ede09bf2725d5b967cbfde521d32eff2ab. --- .../gpu/codec/GPUToHNSWVectorsWriter.java | 94 +++++++++++-------- 1 file changed, 55 insertions(+), 39 deletions(-) diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java index 0a9e871cb62f4..3e5a17d17505f 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java @@ -42,13 +42,8 @@ import java.io.IOException; import java.io.UncheckedIOException; -import java.lang.foreign.Arena; -import java.lang.foreign.MemorySegment; import java.nio.ByteBuffer; import java.nio.ByteOrder; -import java.nio.channels.FileChannel; -import java.nio.file.Path; -import java.nio.file.StandardOpenOption; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -465,36 +460,19 @@ public NodesIterator getNodesOnLevel(int level) { }; } + // TODO check with deleted documents @Override @SuppressForbidden(reason = "require usage of Lucene's IOUtils#deleteFilesIgnoringExceptions(...)") public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOException { - int dims = fieldInfo.getVectorDimension(); flatVectorWriter.mergeOneField(fieldInfo, mergeState); - FloatVectorValues mergeFloatVectorValues = MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState); - - if (mergeFloatVectorValues.size() < MIN_NUM_VECTORS_FOR_GPU_BUILD) { - // TODO: check how deleted documents affect size value - KnnVectorValues.DocIndexIterator iter = mergeFloatVectorValues.iterator(); - float[] vector = new float[dims]; - List vectorsList = new ArrayList<>(); - for (int docV = iter.nextDoc(); docV != NO_MORE_DOCS; docV = iter.nextDoc()) { - System.arraycopy(mergeFloatVectorValues.vectorValue(iter.index()), 0, vector, 0, dims); - vectorsList.add(vector); - } - float[][] vectors = vectorsList.toArray(new float[0][]); - DatasetOrVectors datasetOrVectors = new DatasetOrVectors(vectors); - writeFieldInternal(fieldInfo, datasetOrVectors); - return; - } - - + FloatVectorValues vectorValues = KnnVectorsWriter.MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState); + // save merged vector values to a temp file final int numVectors; String tempRawVectorsFileName = null; boolean success = false; - // save merged vectors to a temporary file try (IndexOutput out = mergeState.segmentInfo.dir.createTempOutput(mergeState.segmentInfo.name, "vec_", IOContext.DEFAULT)) { tempRawVectorsFileName = out.getName(); - numVectors = writeFloatVectorValues(fieldInfo, out, mergeFloatVectorValues); + numVectors = writeFloatVectorValues(fieldInfo, out, MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState)); CodecUtil.writeFooter(out); success = true; } finally { @@ -502,19 +480,16 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE org.apache.lucene.util.IOUtils.deleteFilesIgnoringExceptions(mergeState.segmentInfo.dir, tempRawVectorsFileName); } } - // Use MemorySegment to map the temp file and pass it as a dataset for building the GPU index - try { - final Path path = ((org.apache.lucene.store.FSDirectory) mergeState.segmentInfo.dir).getDirectory().resolve(tempRawVectorsFileName); - Arena arena = Arena.ofShared(); - FileChannel fileChannel = FileChannel.open(path, StandardOpenOption.READ); - final MemorySegment memorySegment = fileChannel.map( - FileChannel.MapMode.READ_ONLY, - 0, - fileChannel.size() - CodecUtil.footerLength(), - arena - ); - Dataset dataset = new DatasetImpl(arena, memorySegment, numVectors, fieldInfo.getVectorDimension()); - DatasetOrVectors datasetOrVectors = new DatasetOrVectors(dataset, null); + try (IndexInput in = mergeState.segmentInfo.dir.openInput(tempRawVectorsFileName, IOContext.DEFAULT)) { + // TODO: Improve this (not acceptable): pass tempRawVectorsFileName for the gpuIndex construction through MemorySegment + final FloatVectorValues floatVectorValues = getFloatVectorValues(fieldInfo, in, numVectors); + float[][] vectors = new float[numVectors][fieldInfo.getVectorDimension()]; + float[] vector; + for (int i = 0; i < numVectors; i++) { + vector = floatVectorValues.vectorValue(i); + System.arraycopy(vector, 0, vectors[i], 0, vector.length); + } + DatasetOrVectors datasetOrVectors = new DatasetOrVectors(vectors); writeFieldInternal(fieldInfo, datasetOrVectors); } finally { org.apache.lucene.util.IOUtils.deleteFilesIgnoringExceptions(mergeState.segmentInfo.dir, tempRawVectorsFileName); @@ -536,6 +511,47 @@ private static int writeFloatVectorValues(FieldInfo fieldInfo, IndexOutput out, return numVectors; } + private static FloatVectorValues getFloatVectorValues(FieldInfo fieldInfo, IndexInput randomAccessInput, int numVectors) { + if (numVectors == 0) { + return FloatVectorValues.fromFloats(List.of(), fieldInfo.getVectorDimension()); + } + final long length = (long) Float.BYTES * fieldInfo.getVectorDimension() + Integer.BYTES; + final float[] vector = new float[fieldInfo.getVectorDimension()]; + return new FloatVectorValues() { + @Override + public float[] vectorValue(int ord) throws IOException { + randomAccessInput.seek(ord * length + Integer.BYTES); + randomAccessInput.readFloats(vector, 0, vector.length); + return vector; + } + + @Override + public FloatVectorValues copy() { + return this; + } + + @Override + public int dimension() { + return fieldInfo.getVectorDimension(); + } + + @Override + public int size() { + return numVectors; + } + + @Override + public int ordToDoc(int ord) { + try { + randomAccessInput.seek(ord * length); + return randomAccessInput.readInt(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + }; + } + private void writeMeta( FieldInfo field, long vectorIndexOffset, From 0ee27d9df8c9d07726c2116876103ac00d19416b Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Tue, 22 Jul 2025 19:19:14 -0400 Subject: [PATCH 017/109] Fix spotless --- .../index/mapper/AbstractDenseVectorFieldMapperTestcase.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/AbstractDenseVectorFieldMapperTestcase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/AbstractDenseVectorFieldMapperTestcase.java index e8a630b4f5d9b..4a78b61c37d31 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/AbstractDenseVectorFieldMapperTestcase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/AbstractDenseVectorFieldMapperTestcase.java @@ -9,7 +9,6 @@ package org.elasticsearch.index.mapper; - import org.apache.lucene.search.FieldExistsQuery; import org.apache.lucene.search.Query; import org.elasticsearch.index.IndexVersion; @@ -1286,7 +1285,6 @@ protected void assertExistsQuery(MappedFieldType fieldType, Query query, LuceneD @Override public void testAggregatableConsistency() {} - @Override protected void assertFetchMany(MapperService mapperService, String field, Object value, String format, int count) throws IOException { assumeFalse("Dense vectors currently don't support multiple values in the same field", false); From d27b62f65958000c44ce0fd4f9b04c032352517d Mon Sep 17 00:00:00 2001 From: ChrisHegarty Date: Thu, 24 Jul 2025 14:13:00 +0100 Subject: [PATCH 018/109] Use off-heap Dataset when merging vector data --- x-pack/plugin/gpu/build.gradle | 1 + .../xpack/gpu/codec/DatasetUtils.java | 25 ++++++ .../xpack/gpu/codec/DatasetUtilsImpl.java | 27 +++++++ .../gpu/codec/GPUToHNSWVectorsWriter.java | 41 +++++++--- .../xpack/gpu/codec/DatasetUtilsImpl.java | 72 ++++++++++++++++++ .../xpack/gpu/codec/DatasetUtilsTests.java | 76 +++++++++++++++++++ 6 files changed, 230 insertions(+), 12 deletions(-) create mode 100644 x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtils.java create mode 100644 x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java create mode 100644 x-pack/plugin/gpu/src/main21/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java create mode 100644 x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsTests.java diff --git a/x-pack/plugin/gpu/build.gradle b/x-pack/plugin/gpu/build.gradle index 48036e8dae137..1ab800cfac4e0 100644 --- a/x-pack/plugin/gpu/build.gradle +++ b/x-pack/plugin/gpu/build.gradle @@ -1,6 +1,7 @@ apply plugin: 'elasticsearch.internal-es-plugin' apply plugin: 'elasticsearch.internal-cluster-test' apply plugin: 'elasticsearch.internal-yaml-rest-test' +apply plugin: 'elasticsearch.mrjar' esplugin { name = 'gpu' diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtils.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtils.java new file mode 100644 index 0000000000000..0f90ab4c5bb75 --- /dev/null +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtils.java @@ -0,0 +1,25 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.gpu.codec; + +import com.nvidia.cuvs.Dataset; + +import org.apache.lucene.store.MemorySegmentAccessInput; + +import java.io.IOException; + +public interface DatasetUtils { + + static DatasetUtils getInstance() { + return DatasetUtilsImpl.getInstance(); + } + + /** Returns a Dataset over the float32 vectors in the input. */ + Dataset fromInput(MemorySegmentAccessInput input, int numVectors, int dims) throws IOException; + +} diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java new file mode 100644 index 0000000000000..3d6d33028ab8f --- /dev/null +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java @@ -0,0 +1,27 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.gpu.codec; + +import com.nvidia.cuvs.Dataset; + +import org.apache.lucene.store.MemorySegmentAccessInput; + +import java.io.IOException; + +/** Stubb holder - never executed. */ +public class DatasetUtilsImpl implements DatasetUtils { + + static DatasetUtils getInstance() { + throw new UnsupportedOperationException("should not reach here"); + } + + @Override + public Dataset fromInput(MemorySegmentAccessInput input, int numVectors, int dims) throws IOException { + throw new UnsupportedOperationException("should not reach here"); + } +} diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java index 3e5a17d17505f..dde06ab3a7f5f 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java @@ -27,9 +27,11 @@ import org.apache.lucene.index.Sorter; import org.apache.lucene.index.VectorEncoding; import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.store.FilterIndexInput; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.MemorySegmentAccessInput; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.hnsw.HnswGraph; import org.apache.lucene.util.hnsw.HnswGraph.NodesIterator; @@ -175,12 +177,15 @@ private static final class DatasetOrVectors { private final Dataset dataset; private final float[][] vectors; - DatasetOrVectors(float[][] vectors) { - this( + static DatasetOrVectors fromArray(float[][] vectors) { + return new DatasetOrVectors( vectors.length < MIN_NUM_VECTORS_FOR_GPU_BUILD ? null : Dataset.ofArray(vectors), vectors.length < MIN_NUM_VECTORS_FOR_GPU_BUILD ? vectors : null ); - validateState(); + } + + static DatasetOrVectors fromDataset(Dataset dataset) { + return new DatasetOrVectors(dataset, null); } private DatasetOrVectors(Dataset dataset, float[][] vectors) { @@ -210,7 +215,7 @@ float[][] getVectors() { private void writeField(FieldWriter fieldWriter) throws IOException { float[][] vectors = fieldWriter.flatFieldVectorsWriter.getVectors().toArray(float[][]::new); - writeFieldInternal(fieldWriter.fieldInfo, new DatasetOrVectors(vectors)); + writeFieldInternal(fieldWriter.fieldInfo, DatasetOrVectors.fromArray(vectors)); } private void writeSortingField(FieldWriter fieldData, Sorter.DocMap sortMap) throws IOException { @@ -481,21 +486,33 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE } } try (IndexInput in = mergeState.segmentInfo.dir.openInput(tempRawVectorsFileName, IOContext.DEFAULT)) { - // TODO: Improve this (not acceptable): pass tempRawVectorsFileName for the gpuIndex construction through MemorySegment - final FloatVectorValues floatVectorValues = getFloatVectorValues(fieldInfo, in, numVectors); - float[][] vectors = new float[numVectors][fieldInfo.getVectorDimension()]; - float[] vector; - for (int i = 0; i < numVectors; i++) { - vector = floatVectorValues.vectorValue(i); - System.arraycopy(vector, 0, vectors[i], 0, vector.length); + DatasetOrVectors datasetOrVectors; + + var input = FilterIndexInput.unwrapOnlyTest(in); + if (input instanceof MemorySegmentAccessInput memorySegmentAccessInput) { + var ds = DatasetUtils.getInstance().fromInput(memorySegmentAccessInput, numVectors, fieldInfo.getVectorDimension()); + datasetOrVectors = DatasetOrVectors.fromDataset(ds); + } else { + var fa = copyVectorsIntoArray(in, fieldInfo, numVectors); + datasetOrVectors = DatasetOrVectors.fromArray(fa); } - DatasetOrVectors datasetOrVectors = new DatasetOrVectors(vectors); writeFieldInternal(fieldInfo, datasetOrVectors); } finally { org.apache.lucene.util.IOUtils.deleteFilesIgnoringExceptions(mergeState.segmentInfo.dir, tempRawVectorsFileName); } } + static float[][] copyVectorsIntoArray(IndexInput in, FieldInfo fieldInfo, int numVectors) throws IOException { + final FloatVectorValues floatVectorValues = getFloatVectorValues(fieldInfo, in, numVectors); + float[][] vectors = new float[numVectors][fieldInfo.getVectorDimension()]; + float[] vector; + for (int i = 0; i < numVectors; i++) { + vector = floatVectorValues.vectorValue(i); + System.arraycopy(vector, 0, vectors[i], 0, vector.length); + } + return vectors; + } + private static int writeFloatVectorValues(FieldInfo fieldInfo, IndexOutput out, FloatVectorValues floatVectorValues) throws IOException { int numVectors = 0; diff --git a/x-pack/plugin/gpu/src/main21/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java b/x-pack/plugin/gpu/src/main21/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java new file mode 100644 index 0000000000000..0cda8bd1253e7 --- /dev/null +++ b/x-pack/plugin/gpu/src/main21/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java @@ -0,0 +1,72 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.gpu.codec; + +import com.nvidia.cuvs.Dataset; +import com.nvidia.cuvs.spi.CuVSProvider; + +import org.apache.lucene.store.MemorySegmentAccessInput; + +import java.io.IOException; +import java.lang.foreign.MemorySegment; +import java.lang.invoke.MethodHandle; + +public class DatasetUtilsImpl implements DatasetUtils { + + private static final DatasetUtils INSTANCE = new DatasetUtilsImpl(); + + private static final MethodHandle createDataset$mh = CuVSProvider.provider().newNativeDatasetBuilder(); + + static DatasetUtils getInstance() { + return INSTANCE; + } + + static Dataset fromMemorySegment(MemorySegment memorySegment, int size, int dimensions) { + try { + return (Dataset) createDataset$mh.invokeExact(memorySegment, size, dimensions); + } catch (Throwable e) { + if (e instanceof Error err) { + throw err; + } else if (e instanceof RuntimeException re) { + throw re; + } else { + throw new RuntimeException(e); + } + } + } + + private DatasetUtilsImpl() {} + + @Override + public Dataset fromInput(MemorySegmentAccessInput input, int numVectors, int dims) throws IOException { + if (numVectors < 0 || dims < 0) { + throwIllegalArgumentException(numVectors, dims); + } + MemorySegment ms = input.segmentSliceOrNull(0L, input.length()); + assert ms != null; // TODO: this can be null if larger than 16GB or ... + if (((long) numVectors * dims * Float.BYTES) < ms.byteSize()) { + throwIllegalArgumentException(ms, numVectors, dims); + } + return fromMemorySegment(ms, numVectors, dims); + } + + static void throwIllegalArgumentException(MemorySegment ms, int numVectors, int dims) { + var s = "segment of size [" + ms.byteSize() + "] too small for expected " + numVectors + " float vectors of " + dims + "dimensions"; + throw new IllegalArgumentException(s); + } + + static void throwIllegalArgumentException(int numVectors, int dims) { + String s; + if (numVectors < 0) { + s = "negative number of vectors:" + numVectors; + } else { + s = "negative vector dims:" + dims; + } + throw new IllegalArgumentException(s); + } +} diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsTests.java new file mode 100644 index 0000000000000..1e01e78fae0b1 --- /dev/null +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsTests.java @@ -0,0 +1,76 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.gpu.codec; + +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.MMapDirectory; +import org.apache.lucene.store.MemorySegmentAccessInput; +import org.elasticsearch.test.ESTestCase; +import org.junit.Before; + +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; +import java.nio.ByteOrder; + +import static java.lang.foreign.ValueLayout.JAVA_FLOAT_UNALIGNED; + +public class DatasetUtilsTests extends ESTestCase { + + @Before + public void setup() { // TODO: abstract out setup in to common GPUTestcase + assumeTrue("cuvs runtime only supported on 22 or greater, your JDK is " + Runtime.version(), Runtime.version().feature() >= 22); + try (var resources = GPUVectorsFormat.cuVSResourcesOrNull()) { + assumeTrue("cuvs not supported", resources != null); + } + } + + static final ValueLayout.OfFloat JAVA_FLOAT_LE = ValueLayout.JAVA_FLOAT_UNALIGNED.withOrder(ByteOrder.LITTLE_ENDIAN); + + final DatasetUtils datasetUtils = DatasetUtils.getInstance(); + + public void testBasic() throws Exception { + try (Directory dir = new MMapDirectory(createTempDir("testBasic"))) { + int numVecs = randomIntBetween(1, 100); + int dims = randomIntBetween(128, 2049); + + try (var out = dir.createOutput("vector.data", IOContext.DEFAULT)) { + var ba = new byte[dims * Float.BYTES]; + var seg = MemorySegment.ofArray(ba); + for (int v = 0; v < numVecs; v++) { + var src = MemorySegment.ofArray(randomVector(dims)); + MemorySegment.copy(src, JAVA_FLOAT_UNALIGNED, 0L, seg, JAVA_FLOAT_LE, 0L, numVecs); + out.writeBytes(ba, 0, ba.length); + } + } + try ( + var in = dir.openInput("vector.data", IOContext.DEFAULT); + var dataset = datasetUtils.fromInput((MemorySegmentAccessInput) in, numVecs, dims) + ) { + assertEquals(numVecs, dataset.size()); + assertEquals(dims, dataset.dimensions()); + } + } + } + + static final Class IAE = IllegalArgumentException.class; + + public void testIllegal() { + MemorySegmentAccessInput in = null; // TODO: make this non-null + expectThrows(IAE, () -> datasetUtils.fromInput(in, -1, 1)); + expectThrows(IAE, () -> datasetUtils.fromInput(in, 1, -1)); + } + + float[] randomVector(int dims) { + float[] fa = new float[dims]; + for (int i = 0; i < dims; ++i) { + fa[i] = random().nextFloat(); + } + return fa; + } +} From e90fcd7cd0821be2ca46d54b0e7b175e2d083672 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Thu, 24 Jul 2025 13:49:29 -0400 Subject: [PATCH 019/109] Plugins can provide VectorsFormatProvider Plugins can provide VectorsFormatProvider that provides new KnnVectorsFormat for different VectorIndexTypes. If there formats provided by plugins they are used instead of standard --- .../index/codec/PerFieldFormatSupplier.java | 2 +- .../index/mapper/DocumentParser.java | 3 +- .../index/mapper/DocumentParserContext.java | 5 +++ .../index/mapper/MapperRegistry.java | 11 ++++- .../index/mapper/MapperService.java | 3 +- .../index/mapper/MappingParserContext.java | 17 ++++++-- .../vectors/DenseVectorFieldMapper.java | 41 ++++++++++++++----- .../mapper/vectors/VectorsFormatProvider.java | 29 +++++++++++++ .../elasticsearch/indices/IndicesModule.java | 16 +++++++- .../elasticsearch/plugins/MapperPlugin.java | 8 ++++ .../metadata/IndexMetadataVerifierTests.java | 8 +++- .../index/IndexSettingsTests.java | 8 +++- .../elasticsearch/index/codec/CodecTests.java | 3 +- .../index/mapper/MappingParserTests.java | 3 +- .../index/mapper/ParametrizedMapperTests.java | 3 +- .../index/mapper/TypeParsersTests.java | 3 +- .../vectors/DenseVectorFieldMapperTests.java | 2 +- .../query/SearchExecutionContextTests.java | 3 +- .../mapper/TestDocumentParserContext.java | 3 +- .../aggregations/AggregatorTestCase.java | 3 +- .../elasticsearch/xpack/gpu/GPUPlugin.java | 17 ++++++++ .../xpack/gpu/codec/GPUVectorsFormat.java | 26 +++++++----- .../xpack/gpu/codec/DatasetUtilsTests.java | 2 +- .../codec/GPUDenseVectorFieldMapperTests.java | 3 +- .../gpu/codec/GPUVectorsFormatTests.java | 2 +- .../mapper/SemanticTextFieldMapper.java | 3 +- 26 files changed, 183 insertions(+), 44 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/index/mapper/vectors/VectorsFormatProvider.java diff --git a/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java b/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java index ecb0d6d5eb3ca..8d193d6aff585 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java +++ b/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java @@ -98,7 +98,7 @@ public KnnVectorsFormat getKnnVectorsFormatForField(String field) { if (mapperService != null) { Mapper mapper = mapperService.mappingLookup().getMapper(field); if (mapper instanceof DenseVectorFieldMapper vectorMapper) { - return vectorMapper.getKnnVectorsFormatForField(knnVectorsFormat); + return vectorMapper.getKnnVectorsFormatForField(knnVectorsFormat, mapperService.getIndexSettings()); } } return knnVectorsFormat; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java index 15e7ff88350b6..b196519551d8f 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java @@ -806,7 +806,8 @@ private static void postProcessDynamicArrayMapping(DocumentParserContext context DenseVectorFieldMapper.Builder builder = new DenseVectorFieldMapper.Builder( fieldName, context.indexSettings().getIndexVersionCreated(), - IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.get(context.indexSettings().getSettings()) + IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.get(context.indexSettings().getSettings()), + context.getVectorFormatProviers() ); builder.dimensions(mappers.size()); DenseVectorFieldMapper denseVectorFieldMapper = builder.build(builderContext); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java index b77c0426c23d4..1bf7480903923 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java @@ -19,6 +19,7 @@ import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.analysis.IndexAnalyzers; import org.elasticsearch.index.mapper.MapperService.MergeReason; +import org.elasticsearch.index.mapper.vectors.VectorsFormatProvider; import org.elasticsearch.xcontent.FilterXContentParserWrapper; import org.elasticsearch.xcontent.FlatteningXContentParser; import org.elasticsearch.xcontent.XContentBuilder; @@ -299,6 +300,10 @@ public final MetadataFieldMapper getMetadataMapper(String mapperName) { return mappingLookup.getMapping().getMetadataMapperByName(mapperName); } + public final List getVectorFormatProviers() { + return mappingParserContext.getVectorsFormatProviders(); + } + public final MappingParserContext dynamicTemplateParserContext(DateFormatter dateFormatter) { return mappingParserContext.createDynamicTemplateContext(dateFormatter); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperRegistry.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperRegistry.java index 44f7def74ec0e..53fa532a78278 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperRegistry.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperRegistry.java @@ -11,11 +11,13 @@ import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.IndexVersions; +import org.elasticsearch.index.mapper.vectors.VectorsFormatProvider; import org.elasticsearch.plugins.FieldPredicate; import org.elasticsearch.plugins.MapperPlugin; import java.util.Collections; import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; import java.util.function.Function; @@ -31,16 +33,19 @@ public final class MapperRegistry { private final Map metadataMapperParsers6x; private final Map metadataMapperParsers5x; private final Function fieldFilter; + private final List vectorsFormatProviders; public MapperRegistry( Map mapperParsers, Map runtimeFieldParsers, Map metadataMapperParsers, - Function fieldFilter + Function fieldFilter, + List vectorsFormatProviders ) { this.mapperParsers = Collections.unmodifiableMap(new LinkedHashMap<>(mapperParsers)); this.runtimeFieldParsers = runtimeFieldParsers; this.metadataMapperParsers = Collections.unmodifiableMap(new LinkedHashMap<>(metadataMapperParsers)); + this.vectorsFormatProviders = vectorsFormatProviders; Map metadata7x = new LinkedHashMap<>(metadataMapperParsers); metadata7x.remove(NestedPathFieldMapper.NAME); this.metadataMapperParsers7x = metadata7x; @@ -72,6 +77,10 @@ public Map getRuntimeFieldParsers() { return runtimeFieldParsers; } + public List getVectorsFormatProviders() { + return vectorsFormatProviders; + } + /** * Return a map of the meta mappers that have been registered. The * returned map uses the name of the field as a key. diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperService.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperService.java index 7958fd8e51525..482dc53066586 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperService.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperService.java @@ -245,7 +245,8 @@ public MapperService( indexAnalyzers, indexSettings, idFieldMapper, - bitSetProducer + bitSetProducer, + mapperRegistry.getVectorsFormatProviders() ); this.documentParser = new DocumentParser(parserConfiguration, this.mappingParserContextSupplier.get()); Map metadataMapperParsers = mapperRegistry.getMetadataMapperParsers( diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappingParserContext.java b/server/src/main/java/org/elasticsearch/index/mapper/MappingParserContext.java index f74a257f32921..50c01a0b99b0a 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappingParserContext.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappingParserContext.java @@ -17,10 +17,12 @@ import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.analysis.IndexAnalyzers; +import org.elasticsearch.index.mapper.vectors.VectorsFormatProvider; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.index.similarity.SimilarityProvider; import org.elasticsearch.script.ScriptCompiler; +import java.util.List; import java.util.function.Function; import java.util.function.Supplier; @@ -41,6 +43,7 @@ public class MappingParserContext { private final IndexSettings indexSettings; private final IdFieldMapper idFieldMapper; private final Function bitSetProducer; + private final List vectorsFormatProviders; private final long mappingObjectDepthLimit; private long mappingObjectDepth = 0; @@ -55,7 +58,8 @@ public MappingParserContext( IndexAnalyzers indexAnalyzers, IndexSettings indexSettings, IdFieldMapper idFieldMapper, - Function bitSetProducer + Function bitSetProducer, + List vectorsFormatProviders ) { this.similarityLookupService = similarityLookupService; this.typeParsers = typeParsers; @@ -69,6 +73,7 @@ public MappingParserContext( this.idFieldMapper = idFieldMapper; this.mappingObjectDepthLimit = indexSettings.getMappingDepthLimit(); this.bitSetProducer = bitSetProducer; + this.vectorsFormatProviders = vectorsFormatProviders; } public IndexAnalyzers getIndexAnalyzers() { @@ -142,6 +147,10 @@ public BitSetProducer bitSetProducer(Query query) { return bitSetProducer.apply(query); } + public List getVectorsFormatProviders() { + return vectorsFormatProviders; + } + void incrementMappingObjectDepth() throws MapperParsingException { mappingObjectDepth++; if (mappingObjectDepth > mappingObjectDepthLimit) { @@ -170,7 +179,8 @@ private static class MultiFieldParserContext extends MappingParserContext { in.indexAnalyzers, in.indexSettings, in.idFieldMapper, - in.bitSetProducer + in.bitSetProducer, + in.vectorsFormatProviders ); } @@ -200,7 +210,8 @@ private static class DynamicTemplateParserContext extends MappingParserContext { in.indexAnalyzers, in.indexSettings, in.idFieldMapper, - in.bitSetProducer + in.bitSetProducer, + in.vectorsFormatProviders ); this.dateFormatter = dateFormatter; } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java index c71c66aaf37a1..49c74c08448d2 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java @@ -48,6 +48,7 @@ import org.elasticsearch.common.util.FeatureFlag; import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.features.NodeFeature; +import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.codec.vectors.ES813FlatVectorFormat; @@ -121,9 +122,6 @@ import static org.elasticsearch.index.codec.vectors.IVFVectorsFormat.MAX_VECTORS_PER_CLUSTER; import static org.elasticsearch.index.codec.vectors.IVFVectorsFormat.MIN_VECTORS_PER_CLUSTER; -/** - * A {@link FieldMapper} for indexing a dense vector of floats. - */ public class DenseVectorFieldMapper extends FieldMapper { public static final String COSINE_MAGNITUDE_FIELD_SUFFIX = "._magnitude"; private static final float EPS = 1e-3f; @@ -256,8 +254,14 @@ public static class Builder extends FieldMapper.Builder { final IndexVersion indexVersionCreated; final boolean isSyntheticVector; + private final List vectorsFormatProviders; - public Builder(String name, IndexVersion indexVersionCreated, boolean isSyntheticVector) { + public Builder( + String name, + IndexVersion indexVersionCreated, + boolean isSyntheticVector, + List vectorsFormatProviders + ) { super(name); this.indexVersionCreated = indexVersionCreated; // This is defined as updatable because it can be updated once, from [null] to a valid dim size, @@ -290,6 +294,7 @@ public Builder(String name, IndexVersion indexVersionCreated, boolean isSyntheti } }); this.isSyntheticVector = isSyntheticVector; + this.vectorsFormatProviders = vectorsFormatProviders; final boolean indexedByDefault = indexVersionCreated.onOrAfter(INDEXED_BY_DEFAULT_INDEX_VERSION); final boolean defaultInt8Hnsw = indexVersionCreated.onOrAfter(IndexVersions.DEFAULT_DENSE_VECTOR_TO_INT8_HNSW); final boolean defaultBBQ8Hnsw = indexVersionCreated.onOrAfter(IndexVersions.DEFAULT_DENSE_VECTOR_TO_BBQ_HNSW); @@ -427,6 +432,7 @@ public Builder indexOptions(DenseVectorIndexOptions indexOptions) { } @Override + public DenseVectorFieldMapper build(MapperBuilderContext context) { // Validate again here because the dimensions or element type could have been set programmatically, // which affects index option validity @@ -448,7 +454,8 @@ public DenseVectorFieldMapper build(MapperBuilderContext context) { builderParams(this, context), indexOptions.getValue(), indexVersionCreated, - isSyntheticVectorFinal + isSyntheticVectorFinal, + vectorsFormatProviders ); } } @@ -2382,7 +2389,8 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws (n, c) -> new Builder( n, c.getIndexSettings().getIndexVersionCreated(), - INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.get(c.getIndexSettings().getSettings()) + INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.get(c.getIndexSettings().getSettings()), + c.getVectorsFormatProviders() ), notInMultiFields(CONTENT_TYPE) ); @@ -2841,6 +2849,7 @@ public List fetchValues(Source source, int doc, List ignoredValu private final DenseVectorIndexOptions indexOptions; private final IndexVersion indexCreatedVersion; private final boolean isSyntheticVector; + private final List extraVectorsFormatProviders; private DenseVectorFieldMapper( String simpleName, @@ -2848,12 +2857,14 @@ private DenseVectorFieldMapper( BuilderParams params, DenseVectorIndexOptions indexOptions, IndexVersion indexCreatedVersion, - boolean isSyntheticVector + boolean isSyntheticVector, + List vectorsFormatProviders ) { super(simpleName, mappedFieldType, params); this.indexOptions = indexOptions; this.indexCreatedVersion = indexCreatedVersion; this.isSyntheticVector = isSyntheticVector; + this.extraVectorsFormatProviders = vectorsFormatProviders; } @Override @@ -2975,7 +2986,7 @@ protected String contentType() { @Override public FieldMapper.Builder getMergeBuilder() { - return new Builder(leafName(), indexCreatedVersion, isSyntheticVector).init(this); + return new Builder(leafName(), indexCreatedVersion, isSyntheticVector, extraVectorsFormatProviders).init(this); } private static DenseVectorIndexOptions parseIndexOptions(String fieldName, Object propNode, IndexVersion indexVersion) { @@ -2998,12 +3009,22 @@ private static DenseVectorIndexOptions parseIndexOptions(String fieldName, Objec * @return the custom kNN vectors format that is configured for this field or * {@code null} if the default format should be used. */ - public KnnVectorsFormat getKnnVectorsFormatForField(KnnVectorsFormat defaultFormat) { + public KnnVectorsFormat getKnnVectorsFormatForField(KnnVectorsFormat defaultFormat, IndexSettings indexSettings) { final KnnVectorsFormat format; if (indexOptions == null) { format = fieldType().elementType == ElementType.BIT ? new ES815HnswBitVectorsFormat() : defaultFormat; } else { - format = indexOptions.getVectorsFormat(fieldType().elementType); + // if plugins provided alternative KnnVectorsFormat for this indexOptions, use it instead of standard + List extraKnnFormats = new ArrayList<>(); + for (VectorsFormatProvider vectorsFormatProvider : extraVectorsFormatProviders) { + KnnVectorsFormat extraKnnFormat = vectorsFormatProvider.getKnnVectorsFormat(indexSettings, indexOptions); + extraKnnFormats.add(extraKnnFormat); + } + if (extraKnnFormats.size() > 0) { + format = extraKnnFormats.get(0); + } else { + format = indexOptions.getVectorsFormat(fieldType().elementType); + } } // It's legal to reuse the same format name as this is the same on-disk format. return new KnnVectorsFormat(format.getName()) { diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/VectorsFormatProvider.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/VectorsFormatProvider.java new file mode 100644 index 0000000000000..f3648cea7f2fe --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/VectorsFormatProvider.java @@ -0,0 +1,29 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.mapper.vectors; + +import org.apache.lucene.codecs.KnnVectorsFormat; +import org.elasticsearch.index.IndexSettings; + +/** + * A service provider interface for obtaining Lucene {@link KnnVectorsFormat} instances. + * Plugins can implement this interface to provide custom vector formats + */ +public interface VectorsFormatProvider { + + /** + * Returns a {@link KnnVectorsFormat} instance based on the provided index settings and vector index options. + * + * @param indexSettings The index settings. + * @param indexOptions The dense vector index options. + * @return A KnnVectorsFormat instance. + */ + KnnVectorsFormat getKnnVectorsFormat(IndexSettings indexSettings, DenseVectorFieldMapper.DenseVectorIndexOptions indexOptions); +} diff --git a/server/src/main/java/org/elasticsearch/indices/IndicesModule.java b/server/src/main/java/org/elasticsearch/indices/IndicesModule.java index 09be98630d5c4..e2854ed7c3426 100644 --- a/server/src/main/java/org/elasticsearch/indices/IndicesModule.java +++ b/server/src/main/java/org/elasticsearch/indices/IndicesModule.java @@ -68,6 +68,7 @@ import org.elasticsearch.index.mapper.flattened.FlattenedFieldMapper; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; import org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper; +import org.elasticsearch.index.mapper.vectors.VectorsFormatProvider; import org.elasticsearch.index.seqno.RetentionLeaseBackgroundSyncAction; import org.elasticsearch.index.seqno.RetentionLeaseSyncAction; import org.elasticsearch.index.seqno.RetentionLeaseSyncer; @@ -80,6 +81,7 @@ import org.elasticsearch.xcontent.NamedXContentRegistry; import org.elasticsearch.xcontent.ParseField; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.LinkedHashMap; @@ -99,7 +101,8 @@ public IndicesModule(List mapperPlugins) { getMappers(mapperPlugins), getRuntimeFields(mapperPlugins), getMetadataMappers(mapperPlugins), - getFieldFilter(mapperPlugins) + getFieldFilter(mapperPlugins), + getVectorFormatProviders(mapperPlugins) ); } @@ -221,6 +224,17 @@ public static Map getMappers(List mappe return Collections.unmodifiableMap(mappers); } + private static List getVectorFormatProviders(List mapperPlugins) { + List vectorsFormatProviders = new ArrayList<>(); + for (MapperPlugin mapperPlugin : mapperPlugins) { + VectorsFormatProvider vectorsFormatProvider = mapperPlugin.getVectorsFormatProvider(); + if (vectorsFormatProvider != null) { + vectorsFormatProviders.add(vectorsFormatProvider); + } + } + return Collections.unmodifiableList(vectorsFormatProviders); + } + private static Map getRuntimeFields(List mapperPlugins) { Map runtimeParsers = new LinkedHashMap<>(); runtimeParsers.put(BooleanFieldMapper.CONTENT_TYPE, BooleanScriptFieldType.PARSER); diff --git a/server/src/main/java/org/elasticsearch/plugins/MapperPlugin.java b/server/src/main/java/org/elasticsearch/plugins/MapperPlugin.java index c82bc286a90c8..9f27427252d90 100644 --- a/server/src/main/java/org/elasticsearch/plugins/MapperPlugin.java +++ b/server/src/main/java/org/elasticsearch/plugins/MapperPlugin.java @@ -12,6 +12,7 @@ import org.elasticsearch.index.mapper.Mapper; import org.elasticsearch.index.mapper.MetadataFieldMapper; import org.elasticsearch.index.mapper.RuntimeField; +import org.elasticsearch.index.mapper.vectors.VectorsFormatProvider; import java.util.Collections; import java.util.Map; @@ -65,6 +66,13 @@ default Function getFieldFilter() { return NOOP_FIELD_FILTER; } + /** + * Returns {VectorFormatProvider} implementations added by this plugin. + */ + default VectorsFormatProvider getVectorsFormatProvider() { + return null; + } + /** * The default field filter applied, which doesn't filter anything. That means that by default get mappings, get index * get field mappings and field capabilities API will return every field that's present in the mappings. diff --git a/server/src/test/java/org/elasticsearch/cluster/metadata/IndexMetadataVerifierTests.java b/server/src/test/java/org/elasticsearch/cluster/metadata/IndexMetadataVerifierTests.java index 074c495e53db2..42513896f84fd 100644 --- a/server/src/test/java/org/elasticsearch/cluster/metadata/IndexMetadataVerifierTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/metadata/IndexMetadataVerifierTests.java @@ -299,7 +299,13 @@ private IndexMetadataVerifier getIndexMetadataVerifier() { Settings.EMPTY, null, xContentRegistry(), - new MapperRegistry(Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), MapperPlugin.NOOP_FIELD_FILTER), + new MapperRegistry( + Collections.emptyMap(), + Collections.emptyMap(), + Collections.emptyMap(), + MapperPlugin.NOOP_FIELD_FILTER, + null + ), IndexScopedSettings.DEFAULT_SCOPED_SETTINGS, null, MapperMetrics.NOOP diff --git a/server/src/test/java/org/elasticsearch/index/IndexSettingsTests.java b/server/src/test/java/org/elasticsearch/index/IndexSettingsTests.java index 691ca7682f30c..bc8ec5b9bbeaf 100644 --- a/server/src/test/java/org/elasticsearch/index/IndexSettingsTests.java +++ b/server/src/test/java/org/elasticsearch/index/IndexSettingsTests.java @@ -857,7 +857,13 @@ public void testIndexMapperDynamic() { Settings.EMPTY, null, xContentRegistry(), - new MapperRegistry(Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), MapperPlugin.NOOP_FIELD_FILTER), + new MapperRegistry( + Collections.emptyMap(), + Collections.emptyMap(), + Collections.emptyMap(), + MapperPlugin.NOOP_FIELD_FILTER, + null + ), IndexScopedSettings.DEFAULT_SCOPED_SETTINGS, null, MapperMetrics.NOOP diff --git a/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java b/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java index 23ee616d54231..3ea71b4432928 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java @@ -128,7 +128,8 @@ private CodecService createCodecService() throws IOException { Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), - MapperPlugin.NOOP_FIELD_FILTER + MapperPlugin.NOOP_FIELD_FILTER, + null ); BitsetFilterCache bitsetFilterCache = new BitsetFilterCache(settings, BitsetFilterCache.Listener.NOOP); MapperService service = new MapperService( diff --git a/server/src/test/java/org/elasticsearch/index/mapper/MappingParserTests.java b/server/src/test/java/org/elasticsearch/index/mapper/MappingParserTests.java index 4b674cf1985b2..a7383a7ced3b3 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/MappingParserTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/MappingParserTests.java @@ -58,7 +58,8 @@ private static MappingParser createMappingParser(Settings settings, IndexVersion indexAnalyzers, indexSettings, indexSettings.getMode().idFieldMapperWithoutFieldData(), - bitsetFilterCache::getBitSetProducer + bitsetFilterCache::getBitSetProducer, + null ); Map metadataMapperParsers = mapperRegistry.getMetadataMapperParsers( diff --git a/server/src/test/java/org/elasticsearch/index/mapper/ParametrizedMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/ParametrizedMapperTests.java index a161bcbc5d6d2..154f7d774bc9a 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/ParametrizedMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/ParametrizedMapperTests.java @@ -272,7 +272,8 @@ private static TestMapper fromMapping( mapperService.getIndexSettings().getMode().idFieldMapperWithoutFieldData(), query -> { throw new UnsupportedOperationException(); - } + }, + null ); if (fromDynamicTemplate) { pc = pc.createDynamicTemplateContext(null); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TypeParsersTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TypeParsersTests.java index 9a30e7d696b68..5b0f823ac1e17 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/TypeParsersTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/TypeParsersTests.java @@ -106,7 +106,8 @@ public void testMultiFieldWithinMultiField() throws IOException { ProvidedIdFieldMapper.NO_FIELD_DATA, query -> { throw new UnsupportedOperationException(); - } + }, + null ); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java index bb7de99b72249..2802d94af5fad 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java @@ -2074,7 +2074,7 @@ public void testValidateOnBuild() { int dimensions = randomIntBetween(64, 1024); // Build a dense vector field mapper with float element type, which will trigger int8 HNSW index options - DenseVectorFieldMapper mapper = new DenseVectorFieldMapper.Builder("test", IndexVersion.current(), false).elementType( + DenseVectorFieldMapper mapper = new DenseVectorFieldMapper.Builder("test", IndexVersion.current(), false, null).elementType( ElementType.FLOAT ).dimensions(dimensions).build(context); diff --git a/server/src/test/java/org/elasticsearch/index/query/SearchExecutionContextTests.java b/server/src/test/java/org/elasticsearch/index/query/SearchExecutionContextTests.java index 0c31ab703862f..9474b49df903d 100644 --- a/server/src/test/java/org/elasticsearch/index/query/SearchExecutionContextTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/SearchExecutionContextTests.java @@ -552,7 +552,8 @@ private static MapperService createMapperService(IndexSettings indexSettings, Ma indexSettings.getMode().buildIdFieldMapper(() -> true), query -> { throw new UnsupportedOperationException(); - } + }, + null ) ); when(mapperService.isMultiField(anyString())).then( diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/TestDocumentParserContext.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/TestDocumentParserContext.java index 49fe9d30239ae..1aef74bc5e735 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/TestDocumentParserContext.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/TestDocumentParserContext.java @@ -67,7 +67,8 @@ private TestDocumentParserContext(MappingLookup mappingLookup, SourceToParse sou null, query -> { throw new UnsupportedOperationException(); - } + }, + null ), source, mappingLookup.getMapping().getRoot(), diff --git a/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java b/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java index 5e451e2e79f10..47ceedb8b686b 100644 --- a/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java @@ -1411,7 +1411,8 @@ private static class MockParserContext extends MappingParserContext { null, query -> { throw new UnsupportedOperationException(); - } + }, + null ); } diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java index 483d4e623c008..ea38a0a1a9b3f 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java @@ -7,10 +7,27 @@ package org.elasticsearch.xpack.gpu; import org.elasticsearch.common.util.FeatureFlag; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; +import org.elasticsearch.index.mapper.vectors.VectorsFormatProvider; import org.elasticsearch.plugins.MapperPlugin; import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.xpack.gpu.codec.GPUVectorsFormat; public class GPUPlugin extends Plugin implements MapperPlugin { public static final FeatureFlag GPU_FORMAT = new FeatureFlag("gpu_format"); + + @Override + public VectorsFormatProvider getVectorsFormatProvider() { + // TODO check indexSettings if it allows for GPU indexing + return (indexSettings, indexOptions) -> { + if (GPU_FORMAT.isEnabled() + && GPUVectorsFormat.cuVSResourcesOrNull(false) != null + && indexOptions.getType() == DenseVectorFieldMapper.VectorIndexType.HNSW) { + return new GPUVectorsFormat(); + } else { + return null; + } + }; + } } diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormat.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormat.java index c54d76f880678..abc2425113a41 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormat.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormat.java @@ -54,7 +54,7 @@ public GPUVectorsFormat() { @Override public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException { - CuVSResources cuVSResources = cuVSResourcesOrNull(); + CuVSResources cuVSResources = cuVSResourcesOrNull(true); if (cuVSResources == null) { throw new IllegalArgumentException("GPU based vector search is not supported on this platform or java version"); } @@ -83,23 +83,27 @@ public String toString() { } /** Tells whether the platform supports cuvs. */ - public static CuVSResources cuVSResourcesOrNull() { + public static CuVSResources cuVSResourcesOrNull(boolean logError) { try { var resources = CuVSResources.create(); return resources; } catch (UnsupportedOperationException uoe) { - String msg = ""; - if (uoe.getMessage() == null) { - msg = "Runtime Java version: " + Runtime.version().feature(); - } else { - msg = ": " + uoe.getMessage(); + if (logError) { + String msg = ""; + if (uoe.getMessage() == null) { + msg = "Runtime Java version: " + Runtime.version().feature(); + } else { + msg = ": " + uoe.getMessage(); + } + LOG.warn("GPU based vector search is not supported on this platform or java version; " + msg); } - LOG.warn("GPU based vector search is not supported on this platform or java version; " + msg); } catch (Throwable t) { - if (t instanceof ExceptionInInitializerError ex) { - t = ex.getCause(); + if (logError) { + if (t instanceof ExceptionInInitializerError ex) { + t = ex.getCause(); + } + LOG.warn("Exception occurred during creation of cuvs resources. " + t); } - LOG.warn("Exception occurred during creation of cuvs resources. " + t); } return null; } diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsTests.java index 1e01e78fae0b1..d9affefabcd40 100644 --- a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsTests.java +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsTests.java @@ -25,7 +25,7 @@ public class DatasetUtilsTests extends ESTestCase { @Before public void setup() { // TODO: abstract out setup in to common GPUTestcase assumeTrue("cuvs runtime only supported on 22 or greater, your JDK is " + Runtime.version(), Runtime.version().feature() >= 22); - try (var resources = GPUVectorsFormat.cuVSResourcesOrNull()) { + try (var resources = GPUVectorsFormat.cuVSResourcesOrNull(false)) { assumeTrue("cuvs not supported", resources != null); } } diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java index c2163cb4f0f68..fd8db09461e39 100644 --- a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java @@ -23,14 +23,13 @@ import java.util.Collection; import java.util.Collections; -import static org.elasticsearch.xpack.gpu.GPUPlugin.GPU_FORMAT; import static org.hamcrest.Matchers.instanceOf; public class GPUDenseVectorFieldMapperTests extends AbstractDenseVectorFieldMapperTestcase { @Before public void setup() { - assumeTrue("feature flag [gpu_format] must be enabled", GPU_FORMAT.isEnabled()); + assumeTrue("cuvs not supported", GPUVectorsFormat.cuVSResourcesOrNull(false) != null); } @Override diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormatTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormatTests.java index 34b99da255bc7..99a823277506b 100644 --- a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormatTests.java +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormatTests.java @@ -25,7 +25,7 @@ public class GPUVectorsFormatTests extends BaseKnnVectorsFormatTestCase { @BeforeClass public static void beforeClass() { - assumeTrue("cuvs not supported", GPUVectorsFormat.cuVSResourcesOrNull() != null); + assumeTrue("cuvs not supported", GPUVectorsFormat.cuVSResourcesOrNull(false) != null); } static final Codec codec = TestUtil.alwaysKnnVectorsFormat(new GPUVectorsFormat()); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java index 9972fa9e5ae0b..ce21f8ae3b2c3 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java @@ -1176,7 +1176,8 @@ private static Mapper.Builder createEmbeddingsField( DenseVectorFieldMapper.Builder denseVectorMapperBuilder = new DenseVectorFieldMapper.Builder( CHUNKED_EMBEDDINGS_FIELD, indexVersionCreated, - false + false, + null ); SimilarityMeasure similarity = modelSettings.similarity(); From 9f2c96f6963207b684a106e1f43bb01487d2d68a Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Fri, 25 Jul 2025 14:35:14 -0400 Subject: [PATCH 020/109] Fix bugs --- .../mapper/vectors/DenseVectorFieldMapper.java | 4 +++- .../mapper/vectors/VectorsFormatProvider.java | 1 + .../xpack/gpu/codec/GPUToHNSWVectorsWriter.java | 17 ++++------------- .../xpack/gpu/codec/DatasetUtilsImpl.java | 8 ++++---- 4 files changed, 12 insertions(+), 18 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java index 49c74c08448d2..8df7358046dd1 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java @@ -3018,7 +3018,9 @@ public KnnVectorsFormat getKnnVectorsFormatForField(KnnVectorsFormat defaultForm List extraKnnFormats = new ArrayList<>(); for (VectorsFormatProvider vectorsFormatProvider : extraVectorsFormatProviders) { KnnVectorsFormat extraKnnFormat = vectorsFormatProvider.getKnnVectorsFormat(indexSettings, indexOptions); - extraKnnFormats.add(extraKnnFormat); + if (extraKnnFormat != null) { + extraKnnFormats.add(extraKnnFormat); + } } if (extraKnnFormats.size() > 0) { format = extraKnnFormats.get(0); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/VectorsFormatProvider.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/VectorsFormatProvider.java index f3648cea7f2fe..4bc338e6680ec 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/VectorsFormatProvider.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/VectorsFormatProvider.java @@ -20,6 +20,7 @@ public interface VectorsFormatProvider { /** * Returns a {@link KnnVectorsFormat} instance based on the provided index settings and vector index options. + * May return {@code null} if the provider does not support the format for the given index settings or vector index options. * * @param indexSettings The index settings. * @param indexOptions The dense vector index options. diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java index dde06ab3a7f5f..cf47d15d83ea5 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java @@ -43,7 +43,6 @@ import org.elasticsearch.logging.Logger; import java.io.IOException; -import java.io.UncheckedIOException; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.util.ArrayList; @@ -470,7 +469,6 @@ public NodesIterator getNodesOnLevel(int level) { @SuppressForbidden(reason = "require usage of Lucene's IOUtils#deleteFilesIgnoringExceptions(...)") public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOException { flatVectorWriter.mergeOneField(fieldInfo, mergeState); - FloatVectorValues vectorValues = KnnVectorsWriter.MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState); // save merged vector values to a temp file final int numVectors; String tempRawVectorsFileName = null; @@ -487,9 +485,8 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE } try (IndexInput in = mergeState.segmentInfo.dir.openInput(tempRawVectorsFileName, IOContext.DEFAULT)) { DatasetOrVectors datasetOrVectors; - var input = FilterIndexInput.unwrapOnlyTest(in); - if (input instanceof MemorySegmentAccessInput memorySegmentAccessInput) { + if (input instanceof MemorySegmentAccessInput memorySegmentAccessInput && numVectors >= MIN_NUM_VECTORS_FOR_GPU_BUILD) { var ds = DatasetUtils.getInstance().fromInput(memorySegmentAccessInput, numVectors, fieldInfo.getVectorDimension()); datasetOrVectors = DatasetOrVectors.fromDataset(ds); } else { @@ -521,7 +518,6 @@ private static int writeFloatVectorValues(FieldInfo fieldInfo, IndexOutput out, for (int docV = iterator.nextDoc(); docV != NO_MORE_DOCS; docV = iterator.nextDoc()) { numVectors++; float[] vector = floatVectorValues.vectorValue(iterator.index()); - out.writeInt(iterator.docID()); buffer.asFloatBuffer().put(vector); out.writeBytes(buffer.array(), buffer.array().length); } @@ -532,12 +528,12 @@ private static FloatVectorValues getFloatVectorValues(FieldInfo fieldInfo, Index if (numVectors == 0) { return FloatVectorValues.fromFloats(List.of(), fieldInfo.getVectorDimension()); } - final long length = (long) Float.BYTES * fieldInfo.getVectorDimension() + Integer.BYTES; + final long length = (long) Float.BYTES * fieldInfo.getVectorDimension(); final float[] vector = new float[fieldInfo.getVectorDimension()]; return new FloatVectorValues() { @Override public float[] vectorValue(int ord) throws IOException { - randomAccessInput.seek(ord * length + Integer.BYTES); + randomAccessInput.seek(ord * length); randomAccessInput.readFloats(vector, 0, vector.length); return vector; } @@ -559,12 +555,7 @@ public int size() { @Override public int ordToDoc(int ord) { - try { - randomAccessInput.seek(ord * length); - return randomAccessInput.readInt(); - } catch (IOException e) { - throw new UncheckedIOException(e); - } + throw new UnsupportedOperationException("Not implemented"); } }; } diff --git a/x-pack/plugin/gpu/src/main21/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java b/x-pack/plugin/gpu/src/main21/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java index 0cda8bd1253e7..998154e1ac303 100644 --- a/x-pack/plugin/gpu/src/main21/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java +++ b/x-pack/plugin/gpu/src/main21/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java @@ -49,23 +49,23 @@ public Dataset fromInput(MemorySegmentAccessInput input, int numVectors, int dim } MemorySegment ms = input.segmentSliceOrNull(0L, input.length()); assert ms != null; // TODO: this can be null if larger than 16GB or ... - if (((long) numVectors * dims * Float.BYTES) < ms.byteSize()) { + if (((long) numVectors * dims * Float.BYTES) > ms.byteSize()) { throwIllegalArgumentException(ms, numVectors, dims); } return fromMemorySegment(ms, numVectors, dims); } static void throwIllegalArgumentException(MemorySegment ms, int numVectors, int dims) { - var s = "segment of size [" + ms.byteSize() + "] too small for expected " + numVectors + " float vectors of " + dims + "dimensions"; + var s = "segment of size [" + ms.byteSize() + "] too small for expected " + numVectors + " float vectors of " + dims + " dims"; throw new IllegalArgumentException(s); } static void throwIllegalArgumentException(int numVectors, int dims) { String s; if (numVectors < 0) { - s = "negative number of vectors:" + numVectors; + s = "negative number of vectors: " + numVectors; } else { - s = "negative vector dims:" + dims; + s = "negative vector dims: " + dims; } throw new IllegalArgumentException(s); } From 4390d558ae42a7efc52bf39be9c756edb13370d6 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Mon, 28 Jul 2025 14:52:55 -0400 Subject: [PATCH 021/109] Introduce an index level setting: index.vectors.indexing.use_gpu index.vectors.indexing.use_gpu has 3 options: - auto (null) default: use gpu indexing when available - false: don't use gpu indexing - true: use gpu indexing and if not available, throw an error --- .../common/settings/IndexScopedSettings.java | 1 + .../elasticsearch/index/IndexSettings.java | 49 +++++++++++++++++++ .../index/IndexSettingsTests.java | 22 +++++++++ .../elasticsearch/xpack/gpu/GPUPlugin.java | 27 +++++++--- .../xpack/gpu/codec/GPUVectorsFormat.java | 4 +- 5 files changed, 93 insertions(+), 10 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java b/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java index 9f4c5b80ccf23..6dd6a803f0cc4 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java +++ b/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java @@ -246,6 +246,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings { if (SYNTHETIC_VECTORS) { settings.add(IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING); } + settings.add(IndexSettings.VECTORS_INDEXING_USE_GPU_SETTING); BUILT_IN_INDEX_SETTINGS = Collections.unmodifiableSet(settings); }; diff --git a/server/src/main/java/org/elasticsearch/index/IndexSettings.java b/server/src/main/java/org/elasticsearch/index/IndexSettings.java index cd78d4323f44b..c0628832f0439 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexSettings.java +++ b/server/src/main/java/org/elasticsearch/index/IndexSettings.java @@ -65,6 +65,7 @@ * be called for each settings update. */ public final class IndexSettings { + public static final Setting> DEFAULT_FIELD_SETTING = Setting.stringListSetting( "index.query.default_field", Collections.singletonList("*"), @@ -856,6 +857,32 @@ private static String getIgnoreAboveDefaultValue(final Settings settings) { Property.Final ); + /** + * An enum for the tri-state value of the `index.vectors.indexing.use_gpu` setting. + */ + public enum GpuMode { + TRUE, + FALSE, + AUTO + } + + /** + * Setting to control whether to use GPU for vectors indexing. + * Currently only applicable for index_options.type: hnsw. + * + * If unset or "auto", an automatic decision is made based on the presence of GPU, necessary libraries, vectors' count and dims. + * If set to true, GPU must be used for vectors indexing, and if GPU or necessary libraries are not available, + * an exception will be thrown. + * If set to false, GPU will not be used for vectors indexing. + */ + public static final Setting VECTORS_INDEXING_USE_GPU_SETTING = Setting.enumSetting( + GpuMode.class, + "index.vectors.indexing.use_gpu", + GpuMode.AUTO, + Property.IndexScope, + Property.Dynamic + ); + private final Index index; private final IndexVersion version; private final Logger logger; @@ -954,6 +981,8 @@ private void setRetentionLeaseMillis(final TimeValue retentionLease) { */ private volatile int maxSlicesPerScroll; + private volatile GpuMode useGpuForVectorsIndexing; + /** * The maximum length of regex string allowed in a regexp query. */ @@ -1129,6 +1158,7 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti && scopedSettings.get(RECOVERY_USE_SYNTHETIC_SOURCE_SETTING); useDocValuesSkipper = DOC_VALUES_SKIPPER && scopedSettings.get(USE_DOC_VALUES_SKIPPER); seqNoIndexOptions = scopedSettings.get(SEQ_NO_INDEX_OPTIONS_SETTING); + this.useGpuForVectorsIndexing = scopedSettings.get(VECTORS_INDEXING_USE_GPU_SETTING); if (recoverySourceSyntheticEnabled) { if (DiscoveryNode.isStateless(settings)) { throw new IllegalArgumentException("synthetic recovery source is only allowed in stateful"); @@ -1151,6 +1181,8 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti } } + scopedSettings.addSettingsUpdateConsumer(VECTORS_INDEXING_USE_GPU_SETTING, this::setUseGpuForVectorsIndexing); + scopedSettings.addSettingsUpdateConsumer( MergePolicyConfig.INDEX_COMPOUND_FORMAT_SETTING, mergePolicyConfig::setCompoundFormatThreshold @@ -1877,6 +1909,23 @@ private void setHnswEarlyTermination(boolean earlyTermination) { this.earlyTermination = earlyTermination; } + private void setUseGpuForVectorsIndexing(GpuMode useGpuForVectorsIndexing) { + this.useGpuForVectorsIndexing = useGpuForVectorsIndexing; + } + + /** + * Whether to use GPU for vectors indexing. + * Currently only applicable for index_options.type: hnsw + * + * @return GpuMode.TRUE if GPU must be used for vectors indexing; + * GpuMode.FALSE if GPU will not be used, or + * GpuMode.AUTO if the setting is not set, + * meaning automatic decision is maded on the presence of GPU, libraries, vectors' count and dims. + */ + public GpuMode useGpuForVectorsIndexing() { + return useGpuForVectorsIndexing; + } + public SeqNoFieldMapper.SeqNoIndexOptions seqNoIndexOptions() { return seqNoIndexOptions; } diff --git a/server/src/test/java/org/elasticsearch/index/IndexSettingsTests.java b/server/src/test/java/org/elasticsearch/index/IndexSettingsTests.java index bc8ec5b9bbeaf..8c81763e195cc 100644 --- a/server/src/test/java/org/elasticsearch/index/IndexSettingsTests.java +++ b/server/src/test/java/org/elasticsearch/index/IndexSettingsTests.java @@ -921,4 +921,26 @@ public void testSame() { } assertTrue(IndexSettings.same(settings, differentOtherSettingBuilder.build())); } + + public void testVectorsUseGpuSetting() { + IndexMetadata metadata = newIndexMeta( + "index", + Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, IndexVersion.current()).build() + ); + IndexSettings settings = new IndexSettings(metadata, Settings.EMPTY); + assertEquals(IndexSettings.GpuMode.AUTO, settings.useGpuForVectorsIndexing()); + + settings.updateIndexMetadata( + newIndexMeta("index", Settings.builder().put(IndexSettings.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), true).build()) + ); + assertEquals(IndexSettings.GpuMode.TRUE, settings.useGpuForVectorsIndexing()); + + settings.updateIndexMetadata( + newIndexMeta("index", Settings.builder().put(IndexSettings.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), false).build()) + ); + assertEquals(IndexSettings.GpuMode.FALSE, settings.useGpuForVectorsIndexing()); + + settings.updateIndexMetadata(newIndexMeta("index", Settings.EMPTY)); + assertEquals(IndexSettings.GpuMode.AUTO, settings.useGpuForVectorsIndexing()); + } } diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java index ea38a0a1a9b3f..bc0a49d1429ca 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java @@ -6,8 +6,10 @@ */ package org.elasticsearch.xpack.gpu; +import com.nvidia.cuvs.CuVSResources; + import org.elasticsearch.common.util.FeatureFlag; -import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; +import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.mapper.vectors.VectorsFormatProvider; import org.elasticsearch.plugins.MapperPlugin; import org.elasticsearch.plugins.Plugin; @@ -19,15 +21,24 @@ public class GPUPlugin extends Plugin implements MapperPlugin { @Override public VectorsFormatProvider getVectorsFormatProvider() { - // TODO check indexSettings if it allows for GPU indexing return (indexSettings, indexOptions) -> { - if (GPU_FORMAT.isEnabled() - && GPUVectorsFormat.cuVSResourcesOrNull(false) != null - && indexOptions.getType() == DenseVectorFieldMapper.VectorIndexType.HNSW) { - return new GPUVectorsFormat(); - } else { - return null; + if (GPU_FORMAT.isEnabled()) { + IndexSettings.GpuMode gpuMode = indexSettings.getValue(IndexSettings.VECTORS_INDEXING_USE_GPU_SETTING); + if (gpuMode == IndexSettings.GpuMode.TRUE) { + CuVSResources resources = GPUVectorsFormat.cuVSResourcesOrNull(true); + if (resources == null) { + throw new IllegalArgumentException( + "[index.vectors.indexing.use_gpu] was set to [true], but GPU resources are not accessible on the node." + ); + } else { + return new GPUVectorsFormat(); + } + } + if ((gpuMode == IndexSettings.GpuMode.AUTO) && GPUVectorsFormat.cuVSResourcesOrNull(false) != null) { + return new GPUVectorsFormat(); + } } + return null; }; } } diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormat.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormat.java index abc2425113a41..c87b5538e2a8b 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormat.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormat.java @@ -56,7 +56,7 @@ public GPUVectorsFormat() { public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException { CuVSResources cuVSResources = cuVSResourcesOrNull(true); if (cuVSResources == null) { - throw new IllegalArgumentException("GPU based vector search is not supported on this platform or java version"); + throw new IllegalArgumentException("GPU based vector indexing is not supported on this platform or java version"); } return new GPUToHNSWVectorsWriter( cuVSResources, @@ -95,7 +95,7 @@ public static CuVSResources cuVSResourcesOrNull(boolean logError) { } else { msg = ": " + uoe.getMessage(); } - LOG.warn("GPU based vector search is not supported on this platform or java version; " + msg); + LOG.warn("GPU based vector indexing is not supported on this platform or java version; " + msg); } } catch (Throwable t) { if (logError) { From 8f22e28e2b35d5d8aa433fdb0350471d5e8d9ae9 Mon Sep 17 00:00:00 2001 From: ChrisHegarty Date: Tue, 29 Jul 2025 11:15:15 +0100 Subject: [PATCH 022/109] Fix initialization of DatasetUtilTests to avoid instance check if unsupported --- .../org/elasticsearch/xpack/gpu/codec/DatasetUtilsTests.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsTests.java index d9affefabcd40..e86270d73695b 100644 --- a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsTests.java +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsTests.java @@ -22,18 +22,19 @@ public class DatasetUtilsTests extends ESTestCase { + DatasetUtils datasetUtils; + @Before public void setup() { // TODO: abstract out setup in to common GPUTestcase assumeTrue("cuvs runtime only supported on 22 or greater, your JDK is " + Runtime.version(), Runtime.version().feature() >= 22); try (var resources = GPUVectorsFormat.cuVSResourcesOrNull(false)) { assumeTrue("cuvs not supported", resources != null); } + datasetUtils = DatasetUtils.getInstance(); } static final ValueLayout.OfFloat JAVA_FLOAT_LE = ValueLayout.JAVA_FLOAT_UNALIGNED.withOrder(ByteOrder.LITTLE_ENDIAN); - final DatasetUtils datasetUtils = DatasetUtils.getInstance(); - public void testBasic() throws Exception { try (Directory dir = new MMapDirectory(createTempDir("testBasic"))) { int numVecs = randomIntBetween(1, 100); From 5386fb80ed6107cb31c45071ee44bf32b033ea15 Mon Sep 17 00:00:00 2001 From: ChrisHegarty Date: Tue, 29 Jul 2025 12:43:20 +0100 Subject: [PATCH 023/109] Add unknown value test to IndexSettingsTests for GPU --- .../org/elasticsearch/index/IndexSettingsTests.java | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/server/src/test/java/org/elasticsearch/index/IndexSettingsTests.java b/server/src/test/java/org/elasticsearch/index/IndexSettingsTests.java index 8c81763e195cc..254ce0335418e 100644 --- a/server/src/test/java/org/elasticsearch/index/IndexSettingsTests.java +++ b/server/src/test/java/org/elasticsearch/index/IndexSettingsTests.java @@ -942,5 +942,16 @@ public void testVectorsUseGpuSetting() { settings.updateIndexMetadata(newIndexMeta("index", Settings.EMPTY)); assertEquals(IndexSettings.GpuMode.AUTO, settings.useGpuForVectorsIndexing()); + + IllegalArgumentException e = expectThrows( + IllegalArgumentException.class, + () -> settings.updateIndexMetadata( + newIndexMeta("index", Settings.builder().put("index.vectors.indexing.use_gpu", "unknown").build()) + ) + ); + assertThat( + e.getMessage(), + Matchers.containsString("illegal value can't update [index.vectors.indexing.use_gpu] from [AUTO] to [unknown]") + ); } } From 7a2cd83a450de029b40c578c7204de4ab244ad6e Mon Sep 17 00:00:00 2001 From: ChrisHegarty Date: Tue, 29 Jul 2025 10:16:42 +0100 Subject: [PATCH 024/109] Add GPU Integration test --- .../elasticsearch/plugin/gpu/GPUIndexIT.java | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java diff --git a/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java new file mode 100644 index 0000000000000..77037bb1a6058 --- /dev/null +++ b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java @@ -0,0 +1,64 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.plugin.gpu; + +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.test.ESIntegTestCase; +import org.elasticsearch.xpack.gpu.GPUPlugin; + +import java.util.Collection; +import java.util.List; + +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; + +public class GPUIndexIT extends ESIntegTestCase { + + @Override + protected Collection> nodePlugins() { + return List.of(GPUPlugin.class); + } + + public void testBasic() { + var settings = Settings.builder().put(indexSettings()); + settings.put("index.number_of_shards", 1); + settings.put("index.vectors.indexing.use_gpu", "true"); + assertAcked(prepareCreate("foo-index").setSettings(settings.build()).setMapping(""" + "properties": { + "my_vector": { + "type": "dense_vector", + "dims": 5, + "similarity": "l2_norm", + "index_options": { + "type": "hnsw" + } + } + } + """)); + ensureGreen(); + + prepareIndex("foo-index").setId("1").setSource("my_vector", new float[] { 230.0f, 300.33f, -34.8988f, 15.555f, -200.0f }).get(); + + // TODO: add more docs... + + ensureGreen(); + refresh(); + + // TODO: do some basic search + // var knn = new KnnSearchBuilder("nested.vector", new float[] { -0.5f, 90.0f, -10f, 14.8f, -156.0f }, 2, 3, null, null); + // var request = prepareSearch("test").addFetchField("name").setKnnSearch(List.of(knn)); + // assertNoFailuresAndResponse(request, response -> { + // assertHitCount(response, 2); + // assertEquals("2", response.getHits().getHits()[0].getId()); + // assertEquals("cat", response.getHits().getHits()[0].field("name").getValue()); + // assertEquals("3", response.getHits().getHits()[1].getId()); + // assertEquals("rat", response.getHits().getHits()[1].field("name").getValue()); + // }); + // } + } +} From 1f742f4cacdd94e5da445fd9295e973ed6514b0e Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Wed, 30 Jul 2025 15:38:33 -0400 Subject: [PATCH 025/109] Add more tests --- .../elasticsearch/index/IndexSettings.java | 1 - .../elasticsearch/plugin/gpu/GPUIndexIT.java | 101 +++++++++++++----- .../plugin/gpu/src/main/java/module-info.java | 1 + .../elasticsearch/xpack/gpu/GPUFeatures.java | 28 +++++ .../elasticsearch/xpack/gpu/GPUPlugin.java | 20 +++- ...lasticsearch.features.FeatureSpecification | 8 ++ .../rest-api-spec/test/gpu/10_basic.yml | 60 ++++++++++- 7 files changed, 187 insertions(+), 32 deletions(-) create mode 100644 x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUFeatures.java create mode 100644 x-pack/plugin/gpu/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification diff --git a/server/src/main/java/org/elasticsearch/index/IndexSettings.java b/server/src/main/java/org/elasticsearch/index/IndexSettings.java index c0628832f0439..68695b2f7dc8b 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexSettings.java +++ b/server/src/main/java/org/elasticsearch/index/IndexSettings.java @@ -1182,7 +1182,6 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti } scopedSettings.addSettingsUpdateConsumer(VECTORS_INDEXING_USE_GPU_SETTING, this::setUseGpuForVectorsIndexing); - scopedSettings.addSettingsUpdateConsumer( MergePolicyConfig.INDEX_COMPOUND_FORMAT_SETTING, mergePolicyConfig::setCompoundFormatThreshold diff --git a/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java index 77037bb1a6058..53e83dac30d4e 100644 --- a/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java +++ b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java @@ -7,16 +7,23 @@ package org.elasticsearch.plugin.gpu; +import org.apache.lucene.tests.util.LuceneTestCase; +import org.elasticsearch.action.bulk.BulkRequestBuilder; +import org.elasticsearch.action.bulk.BulkResponse; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.search.vectors.KnnSearchBuilder; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.xpack.gpu.GPUPlugin; import java.util.Collection; import java.util.List; +import java.util.Locale; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailuresAndResponse; +@LuceneTestCase.SuppressCodecs("*") // use our custom codec public class GPUIndexIT extends ESIntegTestCase { @Override @@ -25,40 +32,82 @@ protected Collection> nodePlugins() { } public void testBasic() { + final int dims = randomIntBetween(4, 128); + final int[] numDocs = new int[] { randomIntBetween(1, 100), 1, 2, randomIntBetween(1, 100) }; + createIndex(dims); + int totalDocs = 0; + for (int i = 0; i < numDocs.length; i++) { + indexDocs(numDocs[i], dims, i * 100); + totalDocs += numDocs[i]; + } + refresh(); + assertSearch(randomFloatVector(dims), totalDocs); + } + + public void testSearchWithoutGPU() { + final int dims = randomIntBetween(4, 128); + final int numDocs = randomIntBetween(1, 500); + createIndex(dims); + ensureGreen(); + + indexDocs(numDocs, dims, 0); + refresh(); + + // update settings to disable GPU usage + Settings.Builder settingsBuilder = Settings.builder().put("index.vectors.indexing.use_gpu", false); + assertAcked(client().admin().indices().prepareUpdateSettings("foo-index").setSettings(settingsBuilder.build())); + ensureGreen(); + assertSearch(randomFloatVector(dims), numDocs); + } + + private void createIndex(int dims) { var settings = Settings.builder().put(indexSettings()); settings.put("index.number_of_shards", 1); - settings.put("index.vectors.indexing.use_gpu", "true"); - assertAcked(prepareCreate("foo-index").setSettings(settings.build()).setMapping(""" - "properties": { - "my_vector": { - "type": "dense_vector", - "dims": 5, - "similarity": "l2_norm", - "index_options": { - "type": "hnsw" + settings.put("index.vectors.indexing.use_gpu", true); + assertAcked(prepareCreate("foo-index").setSettings(settings.build()).setMapping(String.format(Locale.ROOT, """ + { + "properties": { + "my_vector": { + "type": "dense_vector", + "dims": %d, + "similarity": "l2_norm", + "index_options": { + "type": "hnsw" + } } } } - """)); + """, dims))); ensureGreen(); + } - prepareIndex("foo-index").setId("1").setSource("my_vector", new float[] { 230.0f, 300.33f, -34.8988f, 15.555f, -200.0f }).get(); - - // TODO: add more docs... + private void indexDocs(int numDocs, int dims, int startDoc) { + BulkRequestBuilder bulkRequest = client().prepareBulk(); + for (int i = 0; i < numDocs; i++) { + String id = String.valueOf(startDoc + i); + bulkRequest.add(prepareIndex("foo-index").setId(id).setSource("my_vector", randomFloatVector(dims))); + } + BulkResponse bulkResponse = bulkRequest.get(); + assertFalse("Bulk request failed: " + bulkResponse.buildFailureMessage(), bulkResponse.hasFailures()); + } - ensureGreen(); - refresh(); + private void assertSearch(float[] queryVector, int totalDocs) { + int k = Math.min(randomIntBetween(1, 20), totalDocs); + int numCandidates = k * 10; + assertNoFailuresAndResponse( + prepareSearch("foo-index").setSize(k) + .setKnnSearch(List.of(new KnnSearchBuilder("my_vector", queryVector, k, numCandidates, null, null))), + response -> { + assertEquals("Expected k hits to be returned", k, response.getHits().getHits().length); + } + ); + } - // TODO: do some basic search - // var knn = new KnnSearchBuilder("nested.vector", new float[] { -0.5f, 90.0f, -10f, 14.8f, -156.0f }, 2, 3, null, null); - // var request = prepareSearch("test").addFetchField("name").setKnnSearch(List.of(knn)); - // assertNoFailuresAndResponse(request, response -> { - // assertHitCount(response, 2); - // assertEquals("2", response.getHits().getHits()[0].getId()); - // assertEquals("cat", response.getHits().getHits()[0].field("name").getValue()); - // assertEquals("3", response.getHits().getHits()[1].getId()); - // assertEquals("rat", response.getHits().getHits()[1].field("name").getValue()); - // }); - // } + private static float[] randomFloatVector(int dims) { + float[] vector = new float[dims]; + for (int i = 0; i < dims; i++) { + vector[i] = randomFloat(); + } + return vector; } } diff --git a/x-pack/plugin/gpu/src/main/java/module-info.java b/x-pack/plugin/gpu/src/main/java/module-info.java index 5cfb1f3669798..d08bf6b101a4c 100644 --- a/x-pack/plugin/gpu/src/main/java/module-info.java +++ b/x-pack/plugin/gpu/src/main/java/module-info.java @@ -17,4 +17,5 @@ exports org.elasticsearch.xpack.gpu.codec; provides org.apache.lucene.codecs.KnnVectorsFormat with org.elasticsearch.xpack.gpu.codec.GPUVectorsFormat; + provides org.elasticsearch.features.FeatureSpecification with org.elasticsearch.xpack.gpu.GPUFeatures; } diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUFeatures.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUFeatures.java new file mode 100644 index 0000000000000..cf9ed7b7e5a46 --- /dev/null +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUFeatures.java @@ -0,0 +1,28 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.gpu; + +import org.elasticsearch.features.FeatureSpecification; +import org.elasticsearch.features.NodeFeature; + +import java.util.Set; + +public class GPUFeatures implements FeatureSpecification { + + public static final NodeFeature VECTORS_INDEXING_USE_GPU = new NodeFeature("vectors.indexing.use_gpu"); + + @Override + public Set getFeatures() { + return Set.of(); + } + + @Override + public Set getTestFeatures() { + return Set.of(VECTORS_INDEXING_USE_GPU); + } +} diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java index bc0a49d1429ca..8f093ac553112 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java @@ -10,6 +10,7 @@ import org.elasticsearch.common.util.FeatureFlag; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; import org.elasticsearch.index.mapper.vectors.VectorsFormatProvider; import org.elasticsearch.plugins.MapperPlugin; import org.elasticsearch.plugins.Plugin; @@ -25,20 +26,33 @@ public VectorsFormatProvider getVectorsFormatProvider() { if (GPU_FORMAT.isEnabled()) { IndexSettings.GpuMode gpuMode = indexSettings.getValue(IndexSettings.VECTORS_INDEXING_USE_GPU_SETTING); if (gpuMode == IndexSettings.GpuMode.TRUE) { + if (vectorIndexTypeSupported(indexOptions.getType()) == false) { + throw new IllegalArgumentException( + "[index.vectors.indexing.use_gpu] was set to [true], but GPU vector indexing is only supported " + + "for [hnsw] index_options.type, got: [" + + indexOptions.getType() + + "]" + ); + } CuVSResources resources = GPUVectorsFormat.cuVSResourcesOrNull(true); if (resources == null) { throw new IllegalArgumentException( "[index.vectors.indexing.use_gpu] was set to [true], but GPU resources are not accessible on the node." ); - } else { - return new GPUVectorsFormat(); } + return new GPUVectorsFormat(); } - if ((gpuMode == IndexSettings.GpuMode.AUTO) && GPUVectorsFormat.cuVSResourcesOrNull(false) != null) { + if ((gpuMode == IndexSettings.GpuMode.AUTO) + && vectorIndexTypeSupported(indexOptions.getType()) + && GPUVectorsFormat.cuVSResourcesOrNull(false) != null) { return new GPUVectorsFormat(); } } return null; }; } + + private boolean vectorIndexTypeSupported(DenseVectorFieldMapper.VectorIndexType type) { + return type == DenseVectorFieldMapper.VectorIndexType.HNSW; + } } diff --git a/x-pack/plugin/gpu/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification b/x-pack/plugin/gpu/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification new file mode 100644 index 0000000000000..63e111db1dd79 --- /dev/null +++ b/x-pack/plugin/gpu/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification @@ -0,0 +1,8 @@ +# +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License +# 2.0; you may not use this file except in compliance with the Elastic License +# 2.0. +# + +org.elasticsearch.xpack.gpu.GPUFeatures diff --git a/x-pack/plugin/gpu/src/yamlRestTest/resources/rest-api-spec/test/gpu/10_basic.yml b/x-pack/plugin/gpu/src/yamlRestTest/resources/rest-api-spec/test/gpu/10_basic.yml index 582495528b906..02eea5b756938 100644 --- a/x-pack/plugin/gpu/src/yamlRestTest/resources/rest-api-spec/test/gpu/10_basic.yml +++ b/x-pack/plugin/gpu/src/yamlRestTest/resources/rest-api-spec/test/gpu/10_basic.yml @@ -1,5 +1,11 @@ --- "Test GPU vector operations": + + - requires: + cluster_features: [ "vectors.indexing.use_gpu" ] + reason: "A cluster should have a GPU plugin to run these tests" + + # creating an index is successful even if the GPU is not available - do: indices.create: index: my_vectors @@ -12,6 +18,11 @@ similarity: l2_norm index_options: type: hnsw + settings: + index.number_of_shards: 1 + index.vectors.indexing.use_gpu: true + - match: { error: null } + - do: bulk: @@ -32,6 +43,29 @@ embedding: [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] - match: { errors: false } + - do: + bulk: + index: my_vectors + refresh: true + body: + - index: + _id: "4" + - text: "Fourth document" + embedding: [0.05, 0.12, 0.18, 0.22, 0.29, 0.33, 0.41, 0.47, 0.53, 0.59, 0.65, 0.71, 0.77, 0.83, 0.89, 0.95, 0.11, 0.17, 0.23, 0.29, 0.35, 0.41, 0.47, 0.53, 0.59, 0.65, 0.71, 0.77, 0.83, 0.89, 0.95, 0.01, 0.07, 0.13, 0.19, 0.25, 0.31, 0.37, 0.43, 0.49, 0.55, 0.61, 0.67, 0.73, 0.79, 0.85, 0.91, 0.97, 0.03, 0.09, 0.15, 0.21, 0.27, 0.33, 0.39, 0.45, 0.51, 0.57, 0.63, 0.69, 0.75, 0.81, 0.87, 0.93, 0.99, 0.05, 0.11, 0.17, 0.23, 0.29, 0.35, 0.41, 0.47, 0.53, 0.59, 0.65, 0.71, 0.77, 0.83, 0.89, 0.95, 0.01, 0.07, 0.13, 0.19, 0.25, 0.31, 0.37, 0.43, 0.49, 0.55, 0.61, 0.67, 0.73, 0.79, 0.85, 0.91, 0.97, 0.03, 0.09, 0.15, 0.21, 0.27, 0.33, 0.39, 0.45, 0.51, 0.57, 0.63, 0.69, 0.75, 0.81, 0.87, 0.93, 0.99, 0.05, 0.11, 0.17, 0.23, 0.29, 0.35, 0.41, 0.47, 0.53, 0.59, 0.65, 0.71, 0.46] + - index: + _id: "5" + - text: "Fifth document" + embedding: [0.21, 0.31, 0.41, 0.51, 0.61, 0.71, 0.81, 0.91, 0.13, 0.23, 0.33, 0.43, 0.53, 0.63, 0.73, 0.83, 0.93, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 0.17, 0.27, 0.37, 0.47, 0.57, 0.67, 0.77, 0.87, 0.97, 0.19, 0.29, 0.39, 0.49, 0.59, 0.69, 0.79, 0.89, 0.99, 0.11, 0.21, 0.31, 0.41, 0.51, 0.61, 0.71, 0.81, 0.91, 0.13, 0.23, 0.33, 0.43, 0.53, 0.63, 0.73, 0.83, 0.93, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 0.17, 0.27, 0.37, 0.47, 0.57, 0.67, 0.77, 0.87, 0.97, 0.19, 0.29, 0.39, 0.49, 0.59, 0.69, 0.79, 0.89, 0.99, 0.11, 0.21, 0.31, 0.41, 0.51, 0.61, 0.71, 0.81, 0.91, 0.13, 0.23, 0.33, 0.43, 0.53, 0.63, 0.73, 0.83, 0.93, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 0.17, 0.27, 0.37, 0.47, 0.57, 0.67, 0.77, 0.87, 0.97, 0.19, 0.29, 0.39] + - index: + _id: "6" + - text: "Sixth document" + embedding: [0.12, 0.22, 0.32, 0.42, 0.52, 0.62, 0.72, 0.82, 0.92, 0.14, 0.24, 0.34, 0.44, 0.54, 0.64, 0.74, 0.84, 0.94, 0.16, 0.26, 0.36, 0.46, 0.56, 0.66, 0.76, 0.86, 0.96, 0.18, 0.28, 0.38, 0.48, 0.58, 0.68, 0.78, 0.88, 0.98, 0.11, 0.21, 0.31, 0.41, 0.51, 0.61, 0.71, 0.81, 0.91, 0.13, 0.23, 0.33, 0.43, 0.53, 0.63, 0.73, 0.83, 0.93, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 0.17, 0.27, 0.37, 0.47, 0.57, 0.67, 0.77, 0.87, 0.97, 0.19, 0.29, 0.39, 0.49, 0.59, 0.69, 0.79, 0.89, 0.99, 0.11, 0.21, 0.31, 0.41, 0.51, 0.61, 0.71, 0.81, 0.91, 0.13, 0.23, 0.33, 0.43, 0.53, 0.63, 0.73, 0.83, 0.93, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 0.17, 0.27, 0.37, 0.47, 0.57, 0.67, 0.77, 0.87, 0.97, 0.19, 0.29, 0.39, 0.49, 0.59, 0.69, 0.79, 0.89, 0.29, 0.39, 0.49] + - index: + _id: "7" + - text: "Seventh document" + embedding: [0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 0.07, 0.17, 0.27, 0.37, 0.47, 0.57, 0.67, 0.77, 0.87, 0.97, 0.09, 0.19, 0.29, 0.39, 0.49, 0.59, 0.69, 0.79, 0.89, 0.99, 0.11, 0.21, 0.31, 0.41, 0.51, 0.61, 0.71, 0.81, 0.91, 0.13, 0.23, 0.33, 0.43, 0.53, 0.63, 0.73, 0.83, 0.93, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 0.17, 0.27, 0.37, 0.47, 0.57, 0.67, 0.77, 0.87, 0.97, 0.19, 0.29, 0.39, 0.49, 0.59, 0.69, 0.79, 0.89, 0.99, 0.11, 0.21, 0.31, 0.41, 0.51, 0.61, 0.71, 0.81, 0.91, 0.13, 0.23, 0.33, 0.43, 0.53, 0.63, 0.73, 0.83, 0.93, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 0.17, 0.27, 0.37, 0.47, 0.57, 0.67, 0.77, 0.87, 0.97, 0.19, 0.29, 0.39, 0.49, 0.59, 0.69, 0.79, 0.89, 0.90] + - match: { errors: false } + - do: search: index: my_vectors @@ -39,7 +73,29 @@ knn: field: embedding query_vector: [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8] - k: 3 + k: 10 - - match: { hits.total.value: 3 } + - match: { hits.total.value: 7 } - match: { hits.hits.0._id: "1" } + + - do: + bulk: + index: my_vectors + refresh: true + body: + - delete: + _id: "1" + - delete: + _id: "7" + - match: { errors: false } + + - do: + search: + index: my_vectors + body: + knn: + field: embedding + query_vector: [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8] + k: 10 + - match: { hits.total.value: 5 } + - match: { hits.hits.0._id: "2" } From 9ad036725d709b1f53268a70b3400709b23f545b Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Sun, 3 Aug 2025 15:00:17 -0400 Subject: [PATCH 026/109] Adjust Cagra Index degree to fit the default performance of Elasticsearch --- .../elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java | 1 + 1 file changed, 1 insertion(+) diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java index cf47d15d83ea5..a90fdf866643f 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java @@ -263,6 +263,7 @@ private String buildGPUIndex(VectorSimilarityFunction similarityFunction, Datase // TODO: expose cagra index params of intermediate graph degree, graph degree, algorithm, NNDescentNumIterations CagraIndexParams params = new CagraIndexParams.Builder().withNumWriterThreads(1) // TODO: how many CPU threads we can use? .withCagraGraphBuildAlgo(CagraIndexParams.CagraGraphBuildAlgo.NN_DESCENT) + .withGraphDegree(16) .withMetric(distanceType) .build(); From 446fba7c9d46f7ab7912f15cf6d3a5e05818eee8 Mon Sep 17 00:00:00 2001 From: Chris Hegarty <62058229+ChrisHegarty@users.noreply.github.com> Date: Tue, 12 Aug 2025 10:16:19 +0100 Subject: [PATCH 027/109] Add CuVSResourceManager for controlling parallelism and pooling of cuVS resources (#132670) --- .../elasticsearch/plugin/gpu/GPUIndexIT.java | 3 + .../elasticsearch/xpack/gpu/GPUPlugin.java | 9 +- .../elasticsearch/xpack/gpu/GPUSupport.java | 54 +++++++ .../xpack/gpu/codec/CuVSResourceManager.java | 153 ++++++++++++++++++ .../gpu/codec/GPUToHNSWVectorsWriter.java | 86 +++++----- .../xpack/gpu/codec/GPUVectorsFormat.java | 41 +---- .../gpu/codec/CuVSResourceManagerTests.java | 121 ++++++++++++++ .../xpack/gpu/codec/DatasetUtilsTests.java | 5 +- .../codec/GPUDenseVectorFieldMapperTests.java | 3 +- .../gpu/codec/GPUVectorsFormatTests.java | 3 +- 10 files changed, 398 insertions(+), 80 deletions(-) create mode 100644 x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java create mode 100644 x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManager.java create mode 100644 x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManagerTests.java diff --git a/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java index 53e83dac30d4e..f35e1f1d6b659 100644 --- a/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java +++ b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java @@ -15,6 +15,7 @@ import org.elasticsearch.search.vectors.KnnSearchBuilder; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.xpack.gpu.GPUPlugin; +import org.elasticsearch.xpack.gpu.GPUSupport; import java.util.Collection; import java.util.List; @@ -32,6 +33,7 @@ protected Collection> nodePlugins() { } public void testBasic() { + assumeTrue("cuvs not supported", GPUSupport.isSupported(false)); final int dims = randomIntBetween(4, 128); final int[] numDocs = new int[] { randomIntBetween(1, 100), 1, 2, randomIntBetween(1, 100) }; createIndex(dims); @@ -45,6 +47,7 @@ public void testBasic() { } public void testSearchWithoutGPU() { + assumeTrue("cuvs not supported", GPUSupport.isSupported(false)); final int dims = randomIntBetween(4, 128); final int numDocs = randomIntBetween(1, 500); createIndex(dims); diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java index 8f093ac553112..eb7d3b4f594d2 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java @@ -6,8 +6,6 @@ */ package org.elasticsearch.xpack.gpu; -import com.nvidia.cuvs.CuVSResources; - import org.elasticsearch.common.util.FeatureFlag; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; @@ -34,17 +32,16 @@ public VectorsFormatProvider getVectorsFormatProvider() { + "]" ); } - CuVSResources resources = GPUVectorsFormat.cuVSResourcesOrNull(true); - if (resources == null) { + if (GPUSupport.isSupported(true) == false) { throw new IllegalArgumentException( "[index.vectors.indexing.use_gpu] was set to [true], but GPU resources are not accessible on the node." ); } return new GPUVectorsFormat(); } - if ((gpuMode == IndexSettings.GpuMode.AUTO) + if (gpuMode == IndexSettings.GpuMode.AUTO && vectorIndexTypeSupported(indexOptions.getType()) - && GPUVectorsFormat.cuVSResourcesOrNull(false) != null) { + && GPUSupport.isSupported(false)) { return new GPUVectorsFormat(); } } diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java new file mode 100644 index 0000000000000..67fd97faec259 --- /dev/null +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java @@ -0,0 +1,54 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.gpu; + +import com.nvidia.cuvs.CuVSResources; + +import org.elasticsearch.logging.LogManager; +import org.elasticsearch.logging.Logger; + +public class GPUSupport { + + private static final Logger LOG = LogManager.getLogger(GPUSupport.class); + + /** Tells whether the platform supports cuvs. */ + public static boolean isSupported(boolean logError) { + try (var resources = cuVSResourcesOrNull(logError)) { + if (resources != null) { + return true; + } + } + return false; + } + + /** Returns a resources if supported, otherwise null. */ + public static CuVSResources cuVSResourcesOrNull(boolean logError) { + try { + var resources = CuVSResources.create(); + return resources; + } catch (UnsupportedOperationException uoe) { + if (logError) { + String msg = ""; + if (uoe.getMessage() == null) { + msg = "Runtime Java version: " + Runtime.version().feature(); + } else { + msg = ": " + uoe.getMessage(); + } + LOG.warn("GPU based vector indexing is not supported on this platform or java version; " + msg); + } + } catch (Throwable t) { + if (logError) { + if (t instanceof ExceptionInInitializerError ex) { + t = ex.getCause(); + } + LOG.warn("Exception occurred during creation of cuvs resources. " + t); + } + } + return null; + } +} diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManager.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManager.java new file mode 100644 index 0000000000000..e7977f28c9c22 --- /dev/null +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManager.java @@ -0,0 +1,153 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.gpu.codec; + +import com.nvidia.cuvs.CuVSResources; + +import org.elasticsearch.xpack.gpu.GPUSupport; + +import java.nio.file.Path; +import java.util.Objects; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.BlockingQueue; + +/** + * A manager of {@link com.nvidia.cuvs.CuVSResources}. There is one manager per GPU. + * + *

All access to GPU resources is mediated through a manager. A manager helps coordinate usage threads to: + *

    + *
  • ensure single-threaded access to any particular resource at a time
  • + *
  • Control the total number of concurrent operations that may be performed on a GPU
  • + *
  • Pool resources, to avoid frequent creation and destruction, which are expensive operations.
  • + *
+ * + *

Fundamentally, a resource is used in compute and memory bound operations. The former occurs prior to the latter, e.g. + * index build (compute), followed by a copy/process of the newly built index (memory). The manager allows the resource + * user to indicate that compute is complete before releasing the resources. This can help improve parallelism of compute + * on the GPU - allowing the next compute operation to proceed before releasing the resources. + * + */ +public interface CuVSResourceManager { + + /** + * Acquires a resource from the manager. + * + *

A manager can use the given parameters, numVectors and dims, to estimate the potential + * effect on GPU memory and compute usage to determine whether to give out + * another resource or wait for a resources to be returned before giving out another. + */ + // numVectors and dims are currently unused, but could be used along with GPU metadata, + // memory, generation, etc, when acquiring for 10M x 1536 dims, or 100,000 x 128 dims, + // to give out a resources or not. + ManagedCuVSResources acquire(int numVectors, int dims) throws InterruptedException; + + /** Marks the resources as finished with regard to compute. */ + void finishedComputation(ManagedCuVSResources resources); + + /** Returns the given resource to the manager. */ + void release(ManagedCuVSResources resources); + + /** Shuts down the manager, releasing all open resources. */ + void shutdown(); + + /** Returns the system-wide pooling manager. */ + static CuVSResourceManager pooling() { + return PoolingCuVSResourceManager.INSTANCE; + } + + /** + * A manager that maintains a pool of resources. + */ + class PoolingCuVSResourceManager implements CuVSResourceManager { + + static final int MAX_RESOURCES = 2; + static final PoolingCuVSResourceManager INSTANCE = new PoolingCuVSResourceManager(MAX_RESOURCES); + + final BlockingQueue pool; + final int capacity; + int createdCount; + + public PoolingCuVSResourceManager(int capacity) { + if (capacity < 1 || capacity > MAX_RESOURCES) { + throw new IllegalArgumentException("Resource count must be between 1 and " + MAX_RESOURCES); + } + this.capacity = capacity; + this.pool = new ArrayBlockingQueue<>(capacity); + } + + @Override + public ManagedCuVSResources acquire(int numVectors, int dims) throws InterruptedException { + ManagedCuVSResources res = pool.poll(); + if (res != null) { + return res; + } + synchronized (this) { + if (createdCount < capacity) { + createdCount++; + return new ManagedCuVSResources(Objects.requireNonNull(createNew())); + } + } + // Otherwise, wait for one to be released + return pool.take(); + } + + // visible for testing + protected CuVSResources createNew() { + return GPUSupport.cuVSResourcesOrNull(true); + } + + @Override + public void finishedComputation(ManagedCuVSResources resources) { + // currently does nothing, but could allow acquire to return possibly blocked resources + } + + @Override + public void release(ManagedCuVSResources resources) { + var added = pool.offer(Objects.requireNonNull(resources)); + assert added : "Failed to release resource back to pool"; + } + + @Override + public void shutdown() { + for (ManagedCuVSResources res : pool) { + res.delegate.close(); + } + pool.clear(); + } + } + + /** A managed resource. Cannot be closed. */ + final class ManagedCuVSResources implements CuVSResources { + + final CuVSResources delegate; + + ManagedCuVSResources(CuVSResources resources) { + this.delegate = resources; + } + + @Override + public ScopedAccess access() { + return delegate.access(); + } + + @Override + public void close() { + throw new UnsupportedOperationException("this resource is managed, cannot be closed by clients"); + } + + @Override + public Path tempDirectory() { + return null; + } + + @Override + public String toString() { + return "ManagedCuVSResources[delegate=" + delegate + "]"; + } + } +} diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java index a90fdf866643f..163e2277137ed 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java @@ -9,7 +9,6 @@ import com.nvidia.cuvs.CagraIndex; import com.nvidia.cuvs.CagraIndexParams; -import com.nvidia.cuvs.CuVSResources; import com.nvidia.cuvs.Dataset; import org.apache.lucene.codecs.CodecUtil; @@ -68,7 +67,7 @@ final class GPUToHNSWVectorsWriter extends KnnVectorsWriter { private static final long SHALLOW_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(GPUToHNSWVectorsWriter.class); private static final int LUCENE99_HNSW_DIRECT_MONOTONIC_BLOCK_SHIFT = 16; - private final CuVSResources cuVSResources; + private final CuVSResourceManager cuVSResourceManager; private final SegmentWriteState segmentWriteState; private final IndexOutput meta, vectorIndex; private final int M; @@ -78,10 +77,15 @@ final class GPUToHNSWVectorsWriter extends KnnVectorsWriter { private final List fields = new ArrayList<>(); private boolean finished; - GPUToHNSWVectorsWriter(CuVSResources cuVSResources, SegmentWriteState state, int M, int beamWidth, FlatVectorsWriter flatVectorWriter) - throws IOException { - assert cuVSResources != null : "CuVSResources must not be null"; - this.cuVSResources = cuVSResources; + GPUToHNSWVectorsWriter( + CuVSResourceManager cuVSResourceManager, + SegmentWriteState state, + int M, + int beamWidth, + FlatVectorsWriter flatVectorWriter + ) throws IOException { + assert cuVSResourceManager != null : "CuVSResources must not be null"; + this.cuVSResourceManager = cuVSResourceManager; this.M = M; this.flatVectorWriter = flatVectorWriter; this.beamWidth = beamWidth; @@ -267,42 +271,52 @@ private String buildGPUIndex(VectorSimilarityFunction similarityFunction, Datase .withMetric(distanceType) .build(); - // build index on GPU - long startTime = System.nanoTime(); - var index = CagraIndex.newBuilder(cuVSResources).withDataset(dataset).withIndexParams(params).build(); - if (logger.isDebugEnabled()) { - logger.debug("Carga index created in: {} ms; #num vectors: {}", (System.nanoTime() - startTime) / 1_000_000.0, dataset.size()); - } - - // TODO: do serialization through MemorySegment instead of a temp file - // serialize index for CPU consumption to the hnwslib format - startTime = System.nanoTime(); - IndexOutput tempCagraHNSW = null; - boolean success = false; + var cuVSResources = cuVSResourceManager.acquire(dataset.size(), dataset.dimensions()); try { - tempCagraHNSW = segmentWriteState.directory.createTempOutput( - vectorIndex.getName(), - "cagra_hnws_temp", - segmentWriteState.context - ); - var tempCagraHNSWOutputStream = new IndexOutputOutputStream(tempCagraHNSW); - index.serializeToHNSW(tempCagraHNSWOutputStream); + long startTime = System.nanoTime(); + var indexBuilder = CagraIndex.newBuilder(cuVSResources).withDataset(dataset).withIndexParams(params); + var index = indexBuilder.build(); + cuVSResourceManager.finishedComputation(cuVSResources); if (logger.isDebugEnabled()) { - logger.debug("Carga index serialized to hnswlib format in: {} ms", (System.nanoTime() - startTime) / 1_000_000.0); + logger.debug( + "Carga index created in: {} ms; #num vectors: {}", + (System.nanoTime() - startTime) / 1_000_000.0, + dataset.size() + ); } - success = true; - } finally { - index.destroyIndex(); - if (success) { - org.elasticsearch.core.IOUtils.close(tempCagraHNSW); - } else { - if (tempCagraHNSW != null) { - IOUtils.closeWhileHandlingException(tempCagraHNSW); - org.apache.lucene.util.IOUtils.deleteFilesIgnoringExceptions(segmentWriteState.directory, tempCagraHNSW.getName()); + + // TODO: do serialization through MemorySegment instead of a temp file + // serialize index for CPU consumption to the hnwslib format + startTime = System.nanoTime(); + IndexOutput tempCagraHNSW = null; + boolean success = false; + try { + tempCagraHNSW = segmentWriteState.directory.createTempOutput( + vectorIndex.getName(), + "cagra_hnws_temp", + segmentWriteState.context + ); + var tempCagraHNSWOutputStream = new IndexOutputOutputStream(tempCagraHNSW); + index.serializeToHNSW(tempCagraHNSWOutputStream); + if (logger.isDebugEnabled()) { + logger.debug("Carga index serialized to hnswlib format in: {} ms", (System.nanoTime() - startTime) / 1_000_000.0); + } + success = true; + } finally { + index.destroyIndex(); + if (success) { + org.elasticsearch.core.IOUtils.close(tempCagraHNSW); + } else { + if (tempCagraHNSW != null) { + IOUtils.closeWhileHandlingException(tempCagraHNSW); + org.apache.lucene.util.IOUtils.deleteFilesIgnoringExceptions(segmentWriteState.directory, tempCagraHNSW.getName()); + } } } + return tempCagraHNSW.getName(); + } finally { + cuVSResourceManager.release(cuVSResources); } - return tempCagraHNSW.getName(); } @SuppressForbidden(reason = "require usage of Lucene's IOUtils#deleteFilesIgnoringExceptions(...)") diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormat.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormat.java index c87b5538e2a8b..8620795ddff41 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormat.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormat.java @@ -7,8 +7,6 @@ package org.elasticsearch.xpack.gpu.codec; -import com.nvidia.cuvs.CuVSResources; - import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.codecs.KnnVectorsReader; import org.apache.lucene.codecs.KnnVectorsWriter; @@ -48,18 +46,21 @@ public class GPUVectorsFormat extends KnnVectorsFormat { FlatVectorScorerUtil.getLucene99FlatVectorsScorer() ); + final CuVSResourceManager cuVSResourceManager; + public GPUVectorsFormat() { + this(CuVSResourceManager.pooling()); + } + + public GPUVectorsFormat(CuVSResourceManager cuVSResourceManager) { super(NAME); + this.cuVSResourceManager = cuVSResourceManager; } @Override public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException { - CuVSResources cuVSResources = cuVSResourcesOrNull(true); - if (cuVSResources == null) { - throw new IllegalArgumentException("GPU based vector indexing is not supported on this platform or java version"); - } return new GPUToHNSWVectorsWriter( - cuVSResources, + cuVSResourceManager, state, DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH, @@ -81,30 +82,4 @@ public int getMaxDimensions(String fieldName) { public String toString() { return NAME + "()"; } - - /** Tells whether the platform supports cuvs. */ - public static CuVSResources cuVSResourcesOrNull(boolean logError) { - try { - var resources = CuVSResources.create(); - return resources; - } catch (UnsupportedOperationException uoe) { - if (logError) { - String msg = ""; - if (uoe.getMessage() == null) { - msg = "Runtime Java version: " + Runtime.version().feature(); - } else { - msg = ": " + uoe.getMessage(); - } - LOG.warn("GPU based vector indexing is not supported on this platform or java version; " + msg); - } - } catch (Throwable t) { - if (logError) { - if (t instanceof ExceptionInInitializerError ex) { - t = ex.getCause(); - } - LOG.warn("Exception occurred during creation of cuvs resources. " + t); - } - } - return null; - } } diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManagerTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManagerTests.java new file mode 100644 index 0000000000000..a5bac96cc3b51 --- /dev/null +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManagerTests.java @@ -0,0 +1,121 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.gpu.codec; + +import com.nvidia.cuvs.CuVSResources; + +import org.elasticsearch.test.ESTestCase; + +import java.nio.file.Path; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; + +import static org.hamcrest.Matchers.anyOf; +import static org.hamcrest.Matchers.containsString; + +public class CuVSResourceManagerTests extends ESTestCase { + + public void testBasic() throws InterruptedException { + var mgr = new MockPoolingCuVSResourceManager(2); + var res1 = mgr.acquire(0, 0); + var res2 = mgr.acquire(0, 0); + assertThat(res1.toString(), containsString("id=0")); + assertThat(res2.toString(), containsString("id=1")); + mgr.release(res1); + mgr.release(res2); + res1 = mgr.acquire(0, 0); + res2 = mgr.acquire(0, 0); + assertThat(res1.toString(), containsString("id=0")); + assertThat(res2.toString(), containsString("id=1")); + mgr.release(res1); + mgr.release(res2); + mgr.shutdown(); + } + + public void testBlocking() throws Exception { + var mgr = new MockPoolingCuVSResourceManager(2); + var res1 = mgr.acquire(0, 0); + var res2 = mgr.acquire(0, 0); + + AtomicReference holder = new AtomicReference<>(); + Thread t = new Thread(() -> { + try { + var res3 = mgr.acquire(0, 0); + holder.set(res3); + } catch (InterruptedException e) { + throw new AssertionError(e); + } + }); + t.start(); + Thread.sleep(1_000); + assertNull(holder.get()); + mgr.release(randomFrom(res1, res2)); + t.join(); + assertThat(holder.get().toString(), anyOf(containsString("id=0"), containsString("id=1"))); + mgr.shutdown(); + } + + public void testManagedResIsNotClosable() throws Exception { + var mgr = new MockPoolingCuVSResourceManager(1); + var res = mgr.acquire(0, 0); + assertThrows(UnsupportedOperationException.class, () -> res.close()); + mgr.release(res); + mgr.shutdown(); + } + + public void testDoubleRelease() throws InterruptedException { + var mgr = new MockPoolingCuVSResourceManager(2); + var res1 = mgr.acquire(0, 0); + var res2 = mgr.acquire(0, 0); + mgr.release(res1); + mgr.release(res2); + assertThrows(AssertionError.class, () -> mgr.release(randomFrom(res1, res2))); + mgr.shutdown(); + } + + static class MockPoolingCuVSResourceManager extends CuVSResourceManager.PoolingCuVSResourceManager { + + final AtomicInteger idGenerator = new AtomicInteger(); + + MockPoolingCuVSResourceManager(int capacity) { + super(capacity); + } + + @Override + protected CuVSResources createNew() { + return new MockCuVSResources(idGenerator.getAndIncrement()); + } + } + + static class MockCuVSResources implements CuVSResources { + + final int id; + + MockCuVSResources(int id) { + this.id = id; + } + + @Override + public ScopedAccess access() { + throw new UnsupportedOperationException(); + } + + @Override + public void close() {} + + @Override + public Path tempDirectory() { + throw new UnsupportedOperationException(); + } + + @Override + public String toString() { + return "MockCuVSResources[id=" + id + "]"; + } + } +} diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsTests.java index e86270d73695b..bfc4ee6d48d0d 100644 --- a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsTests.java +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsTests.java @@ -12,6 +12,7 @@ import org.apache.lucene.store.MMapDirectory; import org.apache.lucene.store.MemorySegmentAccessInput; import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.gpu.GPUSupport; import org.junit.Before; import java.lang.foreign.MemorySegment; @@ -27,9 +28,7 @@ public class DatasetUtilsTests extends ESTestCase { @Before public void setup() { // TODO: abstract out setup in to common GPUTestcase assumeTrue("cuvs runtime only supported on 22 or greater, your JDK is " + Runtime.version(), Runtime.version().feature() >= 22); - try (var resources = GPUVectorsFormat.cuVSResourcesOrNull(false)) { - assumeTrue("cuvs not supported", resources != null); - } + assumeTrue("cuvs not supported", GPUSupport.isSupported(false)); datasetUtils = DatasetUtils.getInstance(); } diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java index fd8db09461e39..1a1ecf9f85ec4 100644 --- a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java @@ -17,6 +17,7 @@ import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.xpack.gpu.GPUPlugin; +import org.elasticsearch.xpack.gpu.GPUSupport; import org.junit.Before; import java.io.IOException; @@ -29,7 +30,7 @@ public class GPUDenseVectorFieldMapperTests extends AbstractDenseVectorFieldMapp @Before public void setup() { - assumeTrue("cuvs not supported", GPUVectorsFormat.cuVSResourcesOrNull(false) != null); + assumeTrue("cuvs not supported", GPUSupport.isSupported(false)); } @Override diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormatTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormatTests.java index 99a823277506b..b1fcc1b01b58e 100644 --- a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormatTests.java +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormatTests.java @@ -14,6 +14,7 @@ import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; import org.apache.lucene.tests.util.TestUtil; import org.elasticsearch.common.logging.LogConfigurator; +import org.elasticsearch.xpack.gpu.GPUSupport; import org.junit.BeforeClass; public class GPUVectorsFormatTests extends BaseKnnVectorsFormatTestCase { @@ -25,7 +26,7 @@ public class GPUVectorsFormatTests extends BaseKnnVectorsFormatTestCase { @BeforeClass public static void beforeClass() { - assumeTrue("cuvs not supported", GPUVectorsFormat.cuVSResourcesOrNull(false) != null); + assumeTrue("cuvs not supported", GPUSupport.isSupported(false)); } static final Codec codec = TestUtil.alwaysKnnVectorsFormat(new GPUVectorsFormat()); From e6e9c17d1b8875ec406c0731c80bcb726b8b3deb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lorenzo=20Dematt=C3=A9?= Date: Thu, 14 Aug 2025 17:33:03 +0200 Subject: [PATCH 028/109] Use new cuvs version 25.10; direct access to cagra graph via CuVSMatrix (#132832) This PR updates cuvs-java dependency to 25.10 (I left 25.08 and updated its verification metadata to the final version for convenience in case we want to go back). It uses CuVSMatrix as a way to transfer data efficiently from GPU memory to the Java heap directly (and then to a Lucene file). I tried to keep changes at a minimum, but some restructuring was necessary (e.g. resource management need to be done at a upper level - we need to keep hold of the resource until we finished reading the CuVSMatrix). --- gradle/verification-metadata.xml | 6 +- x-pack/plugin/gpu/build.gradle | 6 +- .../elasticsearch/xpack/gpu/GPUSupport.java | 2 +- .../xpack/gpu/codec/DatasetUtils.java | 5 +- .../xpack/gpu/codec/DatasetUtilsImpl.java | 6 +- .../gpu/codec/GPUToHNSWVectorsWriter.java | 191 ++++++------------ .../xpack/gpu/codec/DatasetUtilsImpl.java | 12 +- .../xpack/gpu/codec/DatasetUtilsTests.java | 2 +- .../xpack/gpu/GPUClientYamlTestSuiteIT.java | 1 + 9 files changed, 78 insertions(+), 153 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 689560bb17d38..e2c2f016aa34f 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -15,6 +15,10 @@ + + + + @@ -1130,7 +1134,7 @@ - + diff --git a/x-pack/plugin/gpu/build.gradle b/x-pack/plugin/gpu/build.gradle index 1ab800cfac4e0..0c4ce21d0fad0 100644 --- a/x-pack/plugin/gpu/build.gradle +++ b/x-pack/plugin/gpu/build.gradle @@ -20,14 +20,12 @@ repositories { dependencies { compileOnly project(path: xpackModule('core')) compileOnly project(':server') - implementation 'com.nvidia.cuvs:cuvs-java:25.08.0' + implementation 'com.nvidia.cuvs:cuvs-java:25.10.0' testImplementation(testArtifact(project(xpackModule('core')))) testImplementation(testArtifact(project(':server'))) clusterModules project(xpackModule('gpu')) } -tasks.named("yamlRestTest") { - usesDefaultDistribution("uses gpu plugin") -} + artifacts { restXpackTests(new File(projectDir, "src/yamlRestTest/resources/rest-api-spec/test")) } diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java index 67fd97faec259..f1ff6bcffd1d2 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java @@ -46,7 +46,7 @@ public static CuVSResources cuVSResourcesOrNull(boolean logError) { if (t instanceof ExceptionInInitializerError ex) { t = ex.getCause(); } - LOG.warn("Exception occurred during creation of cuvs resources. " + t); + LOG.warn("Exception occurred during creation of cuvs resources", t); } } return null; diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtils.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtils.java index 0f90ab4c5bb75..bdc598e876931 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtils.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtils.java @@ -7,7 +7,7 @@ package org.elasticsearch.xpack.gpu.codec; -import com.nvidia.cuvs.Dataset; +import com.nvidia.cuvs.CuVSMatrix; import org.apache.lucene.store.MemorySegmentAccessInput; @@ -20,6 +20,5 @@ static DatasetUtils getInstance() { } /** Returns a Dataset over the float32 vectors in the input. */ - Dataset fromInput(MemorySegmentAccessInput input, int numVectors, int dims) throws IOException; - + CuVSMatrix fromInput(MemorySegmentAccessInput input, int numVectors, int dims) throws IOException; } diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java index 3d6d33028ab8f..0cca7ce5cd7cb 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java @@ -7,12 +7,10 @@ package org.elasticsearch.xpack.gpu.codec; -import com.nvidia.cuvs.Dataset; +import com.nvidia.cuvs.CuVSMatrix; import org.apache.lucene.store.MemorySegmentAccessInput; -import java.io.IOException; - /** Stubb holder - never executed. */ public class DatasetUtilsImpl implements DatasetUtils { @@ -21,7 +19,7 @@ static DatasetUtils getInstance() { } @Override - public Dataset fromInput(MemorySegmentAccessInput input, int numVectors, int dims) throws IOException { + public CuVSMatrix fromInput(MemorySegmentAccessInput input, int numVectors, int dims) { throw new UnsupportedOperationException("should not reach here"); } } diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java index 163e2277137ed..73e11dd24f4d5 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java @@ -9,7 +9,7 @@ import com.nvidia.cuvs.CagraIndex; import com.nvidia.cuvs.CagraIndexParams; -import com.nvidia.cuvs.Dataset; +import com.nvidia.cuvs.CuVSMatrix; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.KnnFieldVectorsWriter; @@ -35,7 +35,6 @@ import org.apache.lucene.util.hnsw.HnswGraph; import org.apache.lucene.util.hnsw.HnswGraph.NodesIterator; import org.apache.lucene.util.packed.DirectMonotonicWriter; -import org.elasticsearch.common.lucene.store.IndexOutputOutputStream; import org.elasticsearch.core.IOUtils; import org.elasticsearch.core.SuppressForbidden; import org.elasticsearch.logging.LogManager; @@ -177,21 +176,21 @@ public long ramBytesUsed() { } private static final class DatasetOrVectors { - private final Dataset dataset; + private final CuVSMatrix dataset; private final float[][] vectors; static DatasetOrVectors fromArray(float[][] vectors) { return new DatasetOrVectors( - vectors.length < MIN_NUM_VECTORS_FOR_GPU_BUILD ? null : Dataset.ofArray(vectors), + vectors.length < MIN_NUM_VECTORS_FOR_GPU_BUILD ? null : CuVSMatrix.ofArray(vectors), vectors.length < MIN_NUM_VECTORS_FOR_GPU_BUILD ? vectors : null ); } - static DatasetOrVectors fromDataset(Dataset dataset) { + static DatasetOrVectors fromDataset(CuVSMatrix dataset) { return new DatasetOrVectors(dataset, null); } - private DatasetOrVectors(Dataset dataset, float[][] vectors) { + private DatasetOrVectors(CuVSMatrix dataset, float[][] vectors) { this.dataset = dataset; this.vectors = vectors; validateState(); @@ -204,10 +203,10 @@ private void validateState() { } int size() { - return dataset != null ? dataset.size() : vectors.length; + return dataset != null ? (int) dataset.size() : vectors.length; } - Dataset getDataset() { + CuVSMatrix getDataset() { return dataset; } @@ -243,9 +242,16 @@ private void writeFieldInternal(FieldInfo fieldInfo, DatasetOrVectors datasetOrV } mockGraph = writeGraph(vectors, graphLevelNodeOffsets); } else { - String tempCagraHNSWFileName = buildGPUIndex(fieldInfo.getVectorSimilarityFunction(), datasetOrVectors.dataset); - assert tempCagraHNSWFileName != null : "GPU index should be built for field: " + fieldInfo.name; - mockGraph = writeGraph(tempCagraHNSWFileName, graphLevelNodeOffsets); + var dataset = datasetOrVectors.dataset; + var cuVSResources = cuVSResourceManager.acquire((int) dataset.size(), (int) dataset.columns()); + try { + try (var index = buildGPUIndex(cuVSResources, fieldInfo.getVectorSimilarityFunction(), dataset)) { + assert index != null : "GPU index should be built for field: " + fieldInfo.name; + mockGraph = writeGraph(index.getGraph(), graphLevelNodeOffsets); + } + } finally { + cuVSResourceManager.release(cuVSResources); + } } long vectorIndexLength = vectorIndex.getFilePointer() - vectorIndexOffset; writeMeta(fieldInfo, vectorIndexOffset, vectorIndexLength, datasetOrVectors.size(), mockGraph, graphLevelNodeOffsets); @@ -256,8 +262,11 @@ private void writeFieldInternal(FieldInfo fieldInfo, DatasetOrVectors datasetOrV } } - @SuppressForbidden(reason = "require usage of Lucene's IOUtils#deleteFilesIgnoringExceptions(...)") - private String buildGPUIndex(VectorSimilarityFunction similarityFunction, Dataset dataset) throws Throwable { + private CagraIndex buildGPUIndex( + CuVSResourceManager.ManagedCuVSResources cuVSResources, + VectorSimilarityFunction similarityFunction, + CuVSMatrix dataset + ) throws Throwable { CagraIndexParams.CuvsDistanceType distanceType = switch (similarityFunction) { case EUCLIDEAN -> CagraIndexParams.CuvsDistanceType.L2Expanded; case DOT_PRODUCT, MAXIMUM_INNER_PRODUCT -> CagraIndexParams.CuvsDistanceType.InnerProduct; @@ -271,134 +280,50 @@ private String buildGPUIndex(VectorSimilarityFunction similarityFunction, Datase .withMetric(distanceType) .build(); - var cuVSResources = cuVSResourceManager.acquire(dataset.size(), dataset.dimensions()); - try { - long startTime = System.nanoTime(); - var indexBuilder = CagraIndex.newBuilder(cuVSResources).withDataset(dataset).withIndexParams(params); - var index = indexBuilder.build(); - cuVSResourceManager.finishedComputation(cuVSResources); - if (logger.isDebugEnabled()) { - logger.debug( - "Carga index created in: {} ms; #num vectors: {}", - (System.nanoTime() - startTime) / 1_000_000.0, - dataset.size() - ); - } - - // TODO: do serialization through MemorySegment instead of a temp file - // serialize index for CPU consumption to the hnwslib format - startTime = System.nanoTime(); - IndexOutput tempCagraHNSW = null; - boolean success = false; - try { - tempCagraHNSW = segmentWriteState.directory.createTempOutput( - vectorIndex.getName(), - "cagra_hnws_temp", - segmentWriteState.context - ); - var tempCagraHNSWOutputStream = new IndexOutputOutputStream(tempCagraHNSW); - index.serializeToHNSW(tempCagraHNSWOutputStream); - if (logger.isDebugEnabled()) { - logger.debug("Carga index serialized to hnswlib format in: {} ms", (System.nanoTime() - startTime) / 1_000_000.0); - } - success = true; - } finally { - index.destroyIndex(); - if (success) { - org.elasticsearch.core.IOUtils.close(tempCagraHNSW); - } else { - if (tempCagraHNSW != null) { - IOUtils.closeWhileHandlingException(tempCagraHNSW); - org.apache.lucene.util.IOUtils.deleteFilesIgnoringExceptions(segmentWriteState.directory, tempCagraHNSW.getName()); - } - } - } - return tempCagraHNSW.getName(); - } finally { - cuVSResourceManager.release(cuVSResources); + long startTime = System.nanoTime(); + var indexBuilder = CagraIndex.newBuilder(cuVSResources).withDataset(dataset).withIndexParams(params); + var index = indexBuilder.build(); + cuVSResourceManager.finishedComputation(cuVSResources); + if (logger.isDebugEnabled()) { + logger.debug("Carga index created in: {} ms; #num vectors: {}", (System.nanoTime() - startTime) / 1_000_000.0, dataset.size()); } + return index; } - @SuppressForbidden(reason = "require usage of Lucene's IOUtils#deleteFilesIgnoringExceptions(...)") - private HnswGraph writeGraph(String tempCagraHNSWFileName, int[][] levelNodeOffsets) throws IOException { + private HnswGraph writeGraph(CuVSMatrix cagraGraph, int[][] levelNodeOffsets) throws IOException { long startTime = System.nanoTime(); - boolean success = false; - IndexInput tempCagraHNSWInput = null; - int maxElementCount; - int maxGraphDegree; - try { - tempCagraHNSWInput = segmentWriteState.directory.openInput(tempCagraHNSWFileName, segmentWriteState.context); - // read the metadata from the hnlswlib format; - // some of them are not used in the Lucene HNSW format - tempCagraHNSWInput.readLong(); // offSetLevel0 - maxElementCount = (int) tempCagraHNSWInput.readLong(); - tempCagraHNSWInput.readLong(); // currElementCount - tempCagraHNSWInput.readLong(); // sizeDataPerElement - long labelOffset = tempCagraHNSWInput.readLong(); - long dataOffset = tempCagraHNSWInput.readLong(); - int maxLevel = tempCagraHNSWInput.readInt(); - tempCagraHNSWInput.readInt(); // entryPointNode - tempCagraHNSWInput.readLong(); // maxM - long maxM0 = tempCagraHNSWInput.readLong(); // number of graph connections - tempCagraHNSWInput.readLong(); // M - tempCagraHNSWInput.readLong(); // mult - tempCagraHNSWInput.readLong(); // efConstruction - - assert (maxLevel == 1) : "Cagra index is flat, maxLevel must be: 1, got: " + maxLevel; - maxGraphDegree = (int) maxM0; - int[] neighbors = new int[maxGraphDegree]; - int dimension = (int) ((labelOffset - dataOffset) / Float.BYTES); - // assert (dimension == dimensionCalculated) - // : "Cagra index vector dimension must be: " + dimension + ", got: " + dimensionCalculated; - - levelNodeOffsets[0] = new int[maxElementCount]; - - // read graph from the cagra_hnswlib index and write it to the Lucene vectorIndex file - int[] scratch = new int[maxGraphDegree]; - for (int node = 0; node < maxElementCount; node++) { - // read from the cagra_hnswlib index - int nodeDegree = tempCagraHNSWInput.readInt(); - assert (nodeDegree == maxGraphDegree) - : "In Cagra graph all nodes must have the same number of connections : " + maxGraphDegree + ", got" + nodeDegree; - for (int i = 0; i < nodeDegree; i++) { - neighbors[i] = tempCagraHNSWInput.readInt(); - } - // Skip over the vector data - tempCagraHNSWInput.seek(tempCagraHNSWInput.getFilePointer() + dimension * Float.BYTES); - // Skip over the label/id - tempCagraHNSWInput.seek(tempCagraHNSWInput.getFilePointer() + Long.BYTES); - - // write to the Lucene vectorIndex file - long offsetStart = vectorIndex.getFilePointer(); - Arrays.sort(neighbors); - int actualSize = 0; - scratch[actualSize++] = neighbors[0]; - for (int i = 1; i < nodeDegree; i++) { - assert neighbors[i] < maxElementCount : "node too large: " + neighbors[i] + ">=" + maxElementCount; - if (neighbors[i - 1] == neighbors[i]) { - continue; - } - scratch[actualSize++] = neighbors[i] - neighbors[i - 1]; - } - // Write the size after duplicates are removed - vectorIndex.writeVInt(actualSize); - for (int i = 0; i < actualSize; i++) { - vectorIndex.writeVInt(scratch[i]); + int maxElementCount = (int) cagraGraph.size(); + int maxGraphDegree = (int) cagraGraph.columns(); + int[] neighbors = new int[maxGraphDegree]; + + levelNodeOffsets[0] = new int[maxElementCount]; + // write the cagra graph to the Lucene vectorIndex file + int[] scratch = new int[maxGraphDegree]; + for (int node = 0; node < maxElementCount; node++) { + cagraGraph.getRow(node).toArray(neighbors); + + // write to the Lucene vectorIndex file + long offsetStart = vectorIndex.getFilePointer(); + Arrays.sort(neighbors); + int actualSize = 0; + scratch[actualSize++] = neighbors[0]; + for (int i = 1; i < maxGraphDegree; i++) { + assert neighbors[i] < maxElementCount : "node too large: " + neighbors[i] + ">=" + maxElementCount; + if (neighbors[i - 1] == neighbors[i]) { + continue; } - levelNodeOffsets[0][node] = Math.toIntExact(vectorIndex.getFilePointer() - offsetStart); - } - if (logger.isDebugEnabled()) { - logger.debug("cagra_hnws index serialized to Lucene HNSW in: {} ms", (System.nanoTime() - startTime) / 1_000_000.0); + scratch[actualSize++] = neighbors[i] - neighbors[i - 1]; } - success = true; - } finally { - if (success) { - IOUtils.close(tempCagraHNSWInput); - } else { - IOUtils.closeWhileHandlingException(tempCagraHNSWInput); + // Write the size after duplicates are removed + vectorIndex.writeVInt(actualSize); + for (int i = 0; i < actualSize; i++) { + vectorIndex.writeVInt(scratch[i]); } - org.apache.lucene.util.IOUtils.deleteFilesIgnoringExceptions(segmentWriteState.directory, tempCagraHNSWFileName); + levelNodeOffsets[0][node] = Math.toIntExact(vectorIndex.getFilePointer() - offsetStart); + } + if (logger.isDebugEnabled()) { + logger.debug("cagra_hnws index serialized to Lucene HNSW in: {} ms", (System.nanoTime() - startTime) / 1_000_000.0); } return createMockGraph(maxElementCount, maxGraphDegree); } diff --git a/x-pack/plugin/gpu/src/main21/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java b/x-pack/plugin/gpu/src/main21/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java index 998154e1ac303..01bcde9dcc0e0 100644 --- a/x-pack/plugin/gpu/src/main21/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java +++ b/x-pack/plugin/gpu/src/main21/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java @@ -7,7 +7,7 @@ package org.elasticsearch.xpack.gpu.codec; -import com.nvidia.cuvs.Dataset; +import com.nvidia.cuvs.CuVSMatrix; import com.nvidia.cuvs.spi.CuVSProvider; import org.apache.lucene.store.MemorySegmentAccessInput; @@ -20,15 +20,15 @@ public class DatasetUtilsImpl implements DatasetUtils { private static final DatasetUtils INSTANCE = new DatasetUtilsImpl(); - private static final MethodHandle createDataset$mh = CuVSProvider.provider().newNativeDatasetBuilder(); + private static final MethodHandle createDataset$mh = CuVSProvider.provider().newNativeMatrixBuilder(); static DatasetUtils getInstance() { return INSTANCE; } - static Dataset fromMemorySegment(MemorySegment memorySegment, int size, int dimensions) { + static CuVSMatrix fromMemorySegment(MemorySegment memorySegment, int size, int dimensions, CuVSMatrix.DataType dataType) { try { - return (Dataset) createDataset$mh.invokeExact(memorySegment, size, dimensions); + return (CuVSMatrix) createDataset$mh.invokeExact(memorySegment, size, dimensions, dataType); } catch (Throwable e) { if (e instanceof Error err) { throw err; @@ -43,7 +43,7 @@ static Dataset fromMemorySegment(MemorySegment memorySegment, int size, int dime private DatasetUtilsImpl() {} @Override - public Dataset fromInput(MemorySegmentAccessInput input, int numVectors, int dims) throws IOException { + public CuVSMatrix fromInput(MemorySegmentAccessInput input, int numVectors, int dims) throws IOException { if (numVectors < 0 || dims < 0) { throwIllegalArgumentException(numVectors, dims); } @@ -52,7 +52,7 @@ public Dataset fromInput(MemorySegmentAccessInput input, int numVectors, int dim if (((long) numVectors * dims * Float.BYTES) > ms.byteSize()) { throwIllegalArgumentException(ms, numVectors, dims); } - return fromMemorySegment(ms, numVectors, dims); + return fromMemorySegment(ms, numVectors, dims, CuVSMatrix.DataType.FLOAT); } static void throwIllegalArgumentException(MemorySegment ms, int numVectors, int dims) { diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsTests.java index bfc4ee6d48d0d..0c9c63257c0e8 100644 --- a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsTests.java +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsTests.java @@ -53,7 +53,7 @@ public void testBasic() throws Exception { var dataset = datasetUtils.fromInput((MemorySegmentAccessInput) in, numVecs, dims) ) { assertEquals(numVecs, dataset.size()); - assertEquals(dims, dataset.dimensions()); + assertEquals(dims, dataset.columns()); } } } diff --git a/x-pack/plugin/gpu/src/yamlRestTest/java/org/elasticsearch/xpack/gpu/GPUClientYamlTestSuiteIT.java b/x-pack/plugin/gpu/src/yamlRestTest/java/org/elasticsearch/xpack/gpu/GPUClientYamlTestSuiteIT.java index 0f4a7a059b6d4..4bfaab9243d90 100644 --- a/x-pack/plugin/gpu/src/yamlRestTest/java/org/elasticsearch/xpack/gpu/GPUClientYamlTestSuiteIT.java +++ b/x-pack/plugin/gpu/src/yamlRestTest/java/org/elasticsearch/xpack/gpu/GPUClientYamlTestSuiteIT.java @@ -21,6 +21,7 @@ public class GPUClientYamlTestSuiteIT extends ESClientYamlSuiteTestCase { .module("gpu") .setting("xpack.license.self_generated.type", "trial") .setting("xpack.security.enabled", "false") + .environment("LD_LIBRARY_PATH", System.getenv("LD_LIBRARY_PATH")) .build(); public GPUClientYamlTestSuiteIT(final ClientYamlTestCandidate testCandidate) { From c38c8aa484dea72a899bb85e862d511aa4072c21 Mon Sep 17 00:00:00 2001 From: Brian Seeders Date: Fri, 15 Aug 2025 20:35:22 +0000 Subject: [PATCH 029/109] WIP cuvs-snapshot updating --- .buildkite/scripts/cuvs-snapshot/configure.sh | 39 +++++++++++++++++++ .../update-current-snapshot-version.sh | 38 ++++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100755 .buildkite/scripts/cuvs-snapshot/configure.sh create mode 100755 .buildkite/scripts/cuvs-snapshot/update-current-snapshot-version.sh diff --git a/.buildkite/scripts/cuvs-snapshot/configure.sh b/.buildkite/scripts/cuvs-snapshot/configure.sh new file mode 100755 index 0000000000000..ee81bd939166f --- /dev/null +++ b/.buildkite/scripts/cuvs-snapshot/configure.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +set -euo pipefail + +if [[ "${BUILDKITE:-}" == "true" ]]; then + export JAVA_HOME="$HOME/.java/openjdk24" + export PATH="$JAVA_HOME/bin:$PATH" + + # Setup LD_LIBRARY_PATH, PATH + if [[ -f /etc/profile.d/elastic-nvidia.sh ]]; then + source /etc/profile.d/elastic-nvidia.sh + fi +fi + +CURRENT_SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +ELASTICSEARCH_REPO_DIR="$(cd "$CURRENT_SCRIPT_DIR/../../.." && pwd)" + +CUVS_SNAPSHOT_VERSION="${CUVS_SNAPSHOT_VERSION:-$(cat "$CURRENT_SCRIPT_DIR"/current-snapshot-version)}" +CUVS_ARCHIVE="cuvs-$CUVS_SNAPSHOT_VERSION.tar.gz" +CUVS_URL="https://storage.googleapis.com/elasticsearch-cuvs-snapshots/$CUVS_ARCHIVE" + +CUVS_WORKSPACE=${CUVS_WORKSPACE:-$(cd "$(mktemp -d)")} +CUVS_DIR="$(pwd)/cuvs-$CUVS_SNAPSHOT_VERSION" + +curl -O "$CUVS_URL" +tar -xzf "$CUVS_ARCHIVE" + +CUVS_VERSION=$(cd "$CUVS_DIR/cuvs-java/target" && mvn help:evaluate -Dexpression=project.version -q -DforceStdout) + +LD_LIBRARY_PATH=$(echo "$LD_LIBRARY_PATH" | tr ':' '\n' | grep -v "libcuvs/linux-x64" | tr '\n' ':' | sed 's/:$//') +LD_LIBRARY_PATH="$CUVS_DIR/libcuvs/linux-x64:$LD_LIBRARY_PATH" +export LD_LIBRARY_PATH + +cd "$CUVS_DIR/cuvs-java/target" +mvn install:install-file -Dfile="cuvs-java-$CUVS_VERSION.jar" + +cd "$ELASTICSEARCH_REPO_DIR" +PLUGIN_GRADLE_FILE=x-pack/plugin/gpu/build.gradle +sed -i "s|implementation 'com.nvidia.cuvs:cuvs-java:.*'|implementation 'com.nvidia.cuvs:cuvs-java:$CUVS_VERSION'|" "$PLUGIN_GRADLE_FILE" diff --git a/.buildkite/scripts/cuvs-snapshot/update-current-snapshot-version.sh b/.buildkite/scripts/cuvs-snapshot/update-current-snapshot-version.sh new file mode 100755 index 0000000000000..ab01daf5a5ab6 --- /dev/null +++ b/.buildkite/scripts/cuvs-snapshot/update-current-snapshot-version.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +set -euo pipefail + +SNAPSHOT_VERSION_FILE=.buildkite/scripts/cuvs-snapshot/current-snapshot-version +PLUGIN_GRADLE_FILE=x-pack/plugin/gpu/build.gradle +BRANCH_TO_UPDATE="${BRANCH_TO_UPDATE:-${BUILDKITE_BRANCH:-cuvs-snapshot}}" + +if [[ -z "${CUVS_SNAPSHOT_VERSION:-}" ]]; then + echo "CUVS_SNAPSHOT_VERSION not set. Set this to update the current snapshot version." + exit 1 +fi + +if [[ "$CUVS_SNAPSHOT_VERSION" == "$(cat $SNAPSHOT_VERSION_FILE)" ]]; then + echo "Current snapshot version already set to '$CUVS_SNAPSHOT_VERSION'. No need to update." + exit 0 +fi + +echo "--- Configuring libcuvs/cuvs-java" +source .buildkite/scripts/cuvs-snapshot/configure.sh + +if [[ "${SKIP_TESTING:-}" != "true" ]]; then + echo "--- Testing snapshot before updating" + ./gradlew -Druntime.java=24 :x-pack:plugin:gpu:yamlRestTest -S +fi + +echo "--- Updating snapshot" + +echo "$CUVS_SNAPSHOT_VERSION" > "$SNAPSHOT_VERSION_FILE" + +# CURRENT_SHA="$(gh api "/repos/elastic/elasticsearch/contents/$SNAPSHOT_VERSION_FILE?ref=$BRANCH_TO_UPDATE" | jq -r .sha)" +CURRENT_SHA=test + +echo gh api -X PUT "/repos/elastic/elasticsearch/contents/$SNAPSHOT_VERSION_FILE" \ + -f branch="$BRANCH_TO_UPDATE" \ + -f message="Update cuvs snapshot version to $CUVS_VERSION" \ + -f content="$(base64 -w 0 "$WORKSPACE/$SNAPSHOT_VERSION_FILE")" \ + -f sha="$CURRENT_SHA" From 66e24c023518018dae47089ab176b11697dfde05 Mon Sep 17 00:00:00 2001 From: Brian Seeders Date: Fri, 15 Aug 2025 20:37:58 +0000 Subject: [PATCH 030/109] Update cuvs snapshot version to 25.10.0 --- .buildkite/scripts/cuvs-snapshot/current-snapshot-version | 1 + 1 file changed, 1 insertion(+) create mode 100644 .buildkite/scripts/cuvs-snapshot/current-snapshot-version diff --git a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version new file mode 100644 index 0000000000000..3f8919d7a54f3 --- /dev/null +++ b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version @@ -0,0 +1 @@ +c987afa2 From eb9dfeb7b80b63db493605517e3dbfc56b770b75 Mon Sep 17 00:00:00 2001 From: Brian Seeders Date: Fri, 15 Aug 2025 21:03:56 +0000 Subject: [PATCH 031/109] Always update cuvs-java --- .../scripts/cuvs-snapshot/update-current-snapshot-version.sh | 3 +-- x-pack/plugin/gpu/build.gradle | 4 +++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.buildkite/scripts/cuvs-snapshot/update-current-snapshot-version.sh b/.buildkite/scripts/cuvs-snapshot/update-current-snapshot-version.sh index ab01daf5a5ab6..f651d2e2e44cf 100755 --- a/.buildkite/scripts/cuvs-snapshot/update-current-snapshot-version.sh +++ b/.buildkite/scripts/cuvs-snapshot/update-current-snapshot-version.sh @@ -28,8 +28,7 @@ echo "--- Updating snapshot" echo "$CUVS_SNAPSHOT_VERSION" > "$SNAPSHOT_VERSION_FILE" -# CURRENT_SHA="$(gh api "/repos/elastic/elasticsearch/contents/$SNAPSHOT_VERSION_FILE?ref=$BRANCH_TO_UPDATE" | jq -r .sha)" -CURRENT_SHA=test +CURRENT_SHA="$(gh api "/repos/elastic/elasticsearch/contents/$SNAPSHOT_VERSION_FILE?ref=$BRANCH_TO_UPDATE" | jq -r .sha)" || true echo gh api -X PUT "/repos/elastic/elasticsearch/contents/$SNAPSHOT_VERSION_FILE" \ -f branch="$BRANCH_TO_UPDATE" \ diff --git a/x-pack/plugin/gpu/build.gradle b/x-pack/plugin/gpu/build.gradle index 0c4ce21d0fad0..74da7d119444f 100644 --- a/x-pack/plugin/gpu/build.gradle +++ b/x-pack/plugin/gpu/build.gradle @@ -20,7 +20,9 @@ repositories { dependencies { compileOnly project(path: xpackModule('core')) compileOnly project(':server') - implementation 'com.nvidia.cuvs:cuvs-java:25.10.0' + implementation('com.nvidia.cuvs:cuvs-java:25.10.0') { + changing = true // Ensure that we get updates even when the version number doesn't change. We can remove this once things stabilize + } testImplementation(testArtifact(project(xpackModule('core')))) testImplementation(testArtifact(project(':server'))) clusterModules project(xpackModule('gpu')) From dddadceb7bf55f484edbcf0669d9e928fdeb40c6 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Mon, 18 Aug 2025 09:20:27 -0400 Subject: [PATCH 032/109] Implement working with sorted index --- .../elasticsearch/plugin/gpu/GPUIndexIT.java | 98 ++++++++++++++++--- .../gpu/codec/GPUToHNSWVectorsWriter.java | 7 +- 2 files changed, 89 insertions(+), 16 deletions(-) diff --git a/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java index f35e1f1d6b659..d1eb51b1f0f00 100644 --- a/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java +++ b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java @@ -12,10 +12,12 @@ import org.elasticsearch.action.bulk.BulkResponse; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.vectors.KnnSearchBuilder; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.xpack.gpu.GPUPlugin; import org.elasticsearch.xpack.gpu.GPUSupport; +import org.junit.Assert; import java.util.Collection; import java.util.List; @@ -34,40 +36,102 @@ protected Collection> nodePlugins() { public void testBasic() { assumeTrue("cuvs not supported", GPUSupport.isSupported(false)); + String indexName = "index1"; final int dims = randomIntBetween(4, 128); final int[] numDocs = new int[] { randomIntBetween(1, 100), 1, 2, randomIntBetween(1, 100) }; - createIndex(dims); + createIndex(indexName, dims, false); int totalDocs = 0; for (int i = 0; i < numDocs.length; i++) { - indexDocs(numDocs[i], dims, i * 100); + indexDocs(indexName, numDocs[i], dims, i * 100); totalDocs += numDocs[i]; } refresh(); - assertSearch(randomFloatVector(dims), totalDocs); + assertSearch(indexName, randomFloatVector(dims), totalDocs); + } + + public void testSortedIndexReturnsSameResultsAsUnsorted() { + assumeTrue("cuvs not supported", GPUSupport.isSupported(false)); + String indexName1 = "index_unsorted"; + String indexName2 = "index_sorted"; + final int dims = randomIntBetween(4, 128); + createIndex(indexName1, dims, false); + createIndex(indexName2, dims, true); + + final int[] numDocs = new int[] { randomIntBetween(50, 100), randomIntBetween(50, 100) }; + for (int i = 0; i < numDocs.length; i++) { + BulkRequestBuilder bulkRequest1 = client().prepareBulk(); + BulkRequestBuilder bulkRequest2 = client().prepareBulk(); + for (int j = 0; j < numDocs[i]; j++) { + String id = String.valueOf(i * 100 + j); + String keywordValue = String.valueOf(numDocs[i] - j); + float[] vector = randomFloatVector(dims); + bulkRequest1.add(prepareIndex(indexName1).setId(id).setSource("my_vector", vector, "my_keyword", keywordValue)); + bulkRequest2.add(prepareIndex(indexName2).setId(id).setSource("my_vector", vector, "my_keyword", keywordValue)); + } + BulkResponse bulkResponse1 = bulkRequest1.get(); + assertFalse("Bulk request failed: " + bulkResponse1.buildFailureMessage(), bulkResponse1.hasFailures()); + BulkResponse bulkResponse2 = bulkRequest2.get(); + assertFalse("Bulk request failed: " + bulkResponse2.buildFailureMessage(), bulkResponse2.hasFailures()); + } + refresh(); + + float[] queryVector = randomFloatVector(dims); + int k = 10; + int numCandidates = k * 10; + + var searchResponse1 = prepareSearch(indexName1).setSize(k) + .setFetchSource(false) + .addFetchField("my_keyword") + .setKnnSearch(List.of(new KnnSearchBuilder("my_vector", queryVector, k, numCandidates, null, null))) + .get(); + + var searchResponse2 = prepareSearch(indexName2).setSize(k) + .setFetchSource(false) + .addFetchField("my_keyword") + .setKnnSearch(List.of(new KnnSearchBuilder("my_vector", queryVector, k, numCandidates, null, null))) + .get(); + + try { + SearchHit[] hits1 = searchResponse1.getHits().getHits(); + SearchHit[] hits2 = searchResponse2.getHits().getHits(); + Assert.assertEquals(hits1.length, hits2.length); + for (int i = 0; i < hits1.length; i++) { + Assert.assertEquals(hits1[i].getId(), hits2[i].getId()); + Assert.assertEquals((String) hits1[i].field("my_keyword").getValue(), (String) hits2[i].field("my_keyword").getValue()); + Assert.assertEquals(hits1[i].getScore(), hits2[i].getScore(), 0.0001f); + } + } finally { + searchResponse1.decRef(); + searchResponse2.decRef(); + } } public void testSearchWithoutGPU() { assumeTrue("cuvs not supported", GPUSupport.isSupported(false)); + String indexName = "index1"; final int dims = randomIntBetween(4, 128); final int numDocs = randomIntBetween(1, 500); - createIndex(dims); + createIndex(indexName, dims, false); ensureGreen(); - indexDocs(numDocs, dims, 0); + indexDocs(indexName, numDocs, dims, 0); refresh(); // update settings to disable GPU usage Settings.Builder settingsBuilder = Settings.builder().put("index.vectors.indexing.use_gpu", false); - assertAcked(client().admin().indices().prepareUpdateSettings("foo-index").setSettings(settingsBuilder.build())); + assertAcked(client().admin().indices().prepareUpdateSettings(indexName).setSettings(settingsBuilder.build())); ensureGreen(); - assertSearch(randomFloatVector(dims), numDocs); + assertSearch(indexName, randomFloatVector(dims), numDocs); } - private void createIndex(int dims) { + private void createIndex(String indexName, int dims, boolean sorted) { var settings = Settings.builder().put(indexSettings()); settings.put("index.number_of_shards", 1); settings.put("index.vectors.indexing.use_gpu", true); - assertAcked(prepareCreate("foo-index").setSettings(settings.build()).setMapping(String.format(Locale.ROOT, """ + if (sorted) { + settings.put("index.sort.field", "my_keyword"); + } + assertAcked(prepareCreate(indexName).setSettings(settings.build()).setMapping(String.format(Locale.ROOT, """ { "properties": { "my_vector": { @@ -77,6 +141,9 @@ private void createIndex(int dims) { "index_options": { "type": "hnsw" } + }, + "my_keyword": { + "type": "keyword" } } } @@ -84,21 +151,26 @@ private void createIndex(int dims) { ensureGreen(); } - private void indexDocs(int numDocs, int dims, int startDoc) { + private void indexDocs(String indexName, int numDocs, int dims, int startDoc) { BulkRequestBuilder bulkRequest = client().prepareBulk(); for (int i = 0; i < numDocs; i++) { String id = String.valueOf(startDoc + i); - bulkRequest.add(prepareIndex("foo-index").setId(id).setSource("my_vector", randomFloatVector(dims))); + String keywordValue = String.valueOf(numDocs - i); + var indexRequest = prepareIndex(indexName).setId(id) + .setSource("my_vector", randomFloatVector(dims), "my_keyword", keywordValue); + bulkRequest.add(indexRequest); } BulkResponse bulkResponse = bulkRequest.get(); assertFalse("Bulk request failed: " + bulkResponse.buildFailureMessage(), bulkResponse.hasFailures()); } - private void assertSearch(float[] queryVector, int totalDocs) { + private void assertSearch(String indexName, float[] queryVector, int totalDocs) { int k = Math.min(randomIntBetween(1, 20), totalDocs); int numCandidates = k * 10; assertNoFailuresAndResponse( - prepareSearch("foo-index").setSize(k) + prepareSearch(indexName).setSize(k) + .setFetchSource(false) + .addFetchField("my_keyword") .setKnnSearch(List.of(new KnnSearchBuilder("my_vector", queryVector, k, numCandidates, null, null))), response -> { assertEquals("Expected k hits to be returned", k, response.getHits().getHits().length); diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java index 73e11dd24f4d5..263d1b6572f5b 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java @@ -221,9 +221,10 @@ private void writeField(FieldWriter fieldWriter) throws IOException { } private void writeSortingField(FieldWriter fieldData, Sorter.DocMap sortMap) throws IOException { - // TODO: implement writing sorted field when we can access cagra index through MemorySegment - // as we need random access to neighbors in the graph. - throw new UnsupportedOperationException("Writing field with index sorted needs to be implemented."); + // The flatFieldVectorsWriter's flush method, called before this, has already sorted the vectors according to the sortMap. + // We can now treat them as a simple, sorted list of vectors. + float[][] vectors = fieldData.flatFieldVectorsWriter.getVectors().toArray(float[][]::new); + writeFieldInternal(fieldData.fieldInfo, DatasetOrVectors.fromArray(vectors)); } private void writeFieldInternal(FieldInfo fieldInfo, DatasetOrVectors datasetOrVectors) throws IOException { From 9c1db2d0e921d23fdcb8933119f48d92eb375645 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Tue, 19 Aug 2025 07:05:29 -0400 Subject: [PATCH 033/109] Expose m and efConstruction params in GPU index building --- .../vectors/DenseVectorFieldMapper.java | 10 +++++- .../elasticsearch/xpack/gpu/GPUPlugin.java | 21 ++++++++++-- .../gpu/codec/GPUToHNSWVectorsWriter.java | 7 ++-- .../xpack/gpu/codec/GPUVectorsFormat.java | 33 +++++++++---------- .../gpu/codec/GPUVectorsFormatTests.java | 17 +--------- 5 files changed, 49 insertions(+), 39 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java index ea21772c2dab8..48af91065aca9 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java @@ -2117,7 +2117,7 @@ public boolean updatableTo(DenseVectorIndexOptions update) { } } - static class HnswIndexOptions extends DenseVectorIndexOptions { + public static class HnswIndexOptions extends DenseVectorIndexOptions { private final int m; private final int efConstruction; @@ -2160,6 +2160,14 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws return builder; } + public int m() { + return m; + } + + public int efConstruction() { + return efConstruction; + } + @Override public boolean doEquals(DenseVectorIndexOptions o) { if (this == o) return true; diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java index eb7d3b4f594d2..f376e19a6d980 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java @@ -6,6 +6,8 @@ */ package org.elasticsearch.xpack.gpu; +import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.util.hnsw.HnswGraphBuilder; import org.elasticsearch.common.util.FeatureFlag; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; @@ -37,12 +39,12 @@ public VectorsFormatProvider getVectorsFormatProvider() { "[index.vectors.indexing.use_gpu] was set to [true], but GPU resources are not accessible on the node." ); } - return new GPUVectorsFormat(); + return getVectorsFormat(indexOptions); } if (gpuMode == IndexSettings.GpuMode.AUTO && vectorIndexTypeSupported(indexOptions.getType()) && GPUSupport.isSupported(false)) { - return new GPUVectorsFormat(); + return getVectorsFormat(indexOptions); } } return null; @@ -52,4 +54,19 @@ && vectorIndexTypeSupported(indexOptions.getType()) private boolean vectorIndexTypeSupported(DenseVectorFieldMapper.VectorIndexType type) { return type == DenseVectorFieldMapper.VectorIndexType.HNSW; } + + private static KnnVectorsFormat getVectorsFormat(DenseVectorFieldMapper.DenseVectorIndexOptions indexOptions) { + if (indexOptions.getType() == DenseVectorFieldMapper.VectorIndexType.HNSW) { + DenseVectorFieldMapper.HnswIndexOptions hnswIndexOptions = (DenseVectorFieldMapper.HnswIndexOptions) indexOptions; + int efConstruction = hnswIndexOptions.efConstruction(); + if (efConstruction == HnswGraphBuilder.DEFAULT_BEAM_WIDTH) { + efConstruction = GPUVectorsFormat.DEFAULT_BEAM_WIDTH; // default value for GPU graph construction is 128 + } + return new GPUVectorsFormat(hnswIndexOptions.m(), efConstruction); + } else { + throw new IllegalArgumentException( + "GPU vector indexing is not supported on this vector type: [" + indexOptions.getType() + "]" + ); + } + } } diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java index 263d1b6572f5b..4968a659e73e7 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java @@ -86,8 +86,8 @@ final class GPUToHNSWVectorsWriter extends KnnVectorsWriter { assert cuVSResourceManager != null : "CuVSResources must not be null"; this.cuVSResourceManager = cuVSResourceManager; this.M = M; - this.flatVectorWriter = flatVectorWriter; this.beamWidth = beamWidth; + this.flatVectorWriter = flatVectorWriter; this.segmentWriteState = state; String metaFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, LUCENE99_HNSW_META_EXTENSION); String indexDataFileName = IndexFileNames.segmentFileName( @@ -274,10 +274,11 @@ private CagraIndex buildGPUIndex( case COSINE -> CagraIndexParams.CuvsDistanceType.CosineExpanded; }; - // TODO: expose cagra index params of intermediate graph degree, graph degree, algorithm, NNDescentNumIterations + // TODO: expose cagra index params for algorithm, NNDescentNumIterations CagraIndexParams params = new CagraIndexParams.Builder().withNumWriterThreads(1) // TODO: how many CPU threads we can use? .withCagraGraphBuildAlgo(CagraIndexParams.CagraGraphBuildAlgo.NN_DESCENT) - .withGraphDegree(16) + .withGraphDegree(M) + .withIntermediateGraphDegree(beamWidth) .withMetric(distanceType) .build(); diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormat.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormat.java index 8620795ddff41..8a6243d40410f 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormat.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormat.java @@ -16,8 +16,6 @@ import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; -import org.elasticsearch.logging.LogManager; -import org.elasticsearch.logging.Logger; import java.io.IOException; @@ -26,9 +24,6 @@ * leverage GPU processing capabilities for vector search operations. */ public class GPUVectorsFormat extends KnnVectorsFormat { - - private static final Logger LOG = LogManager.getLogger(GPUVectorsFormat.class); - public static final String NAME = "GPUVectorsFormat"; public static final int VERSION_START = 0; @@ -38,34 +33,38 @@ public class GPUVectorsFormat extends KnnVectorsFormat { static final String LUCENE99_HNSW_VECTOR_INDEX_EXTENSION = "vex"; static final int LUCENE99_VERSION_CURRENT = VERSION_START; - static final int DEFAULT_MAX_CONN = 16; - static final int DEFAULT_BEAM_WIDTH = 100; + static final int DEFAULT_MAX_CONN = 16; // graph degree + public static final int DEFAULT_BEAM_WIDTH = 128; // intermediate graph degree static final int MIN_NUM_VECTORS_FOR_GPU_BUILD = 2; private static final FlatVectorsFormat flatVectorsFormat = new Lucene99FlatVectorsFormat( FlatVectorScorerUtil.getLucene99FlatVectorsScorer() ); + // How many nodes each node in the graph is connected to in the final graph + private final int maxConn; + // Intermediate graph degree, the number of connections for each node before pruning + private final int beamWidth; final CuVSResourceManager cuVSResourceManager; public GPUVectorsFormat() { - this(CuVSResourceManager.pooling()); + this(CuVSResourceManager.pooling(), DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH); } - public GPUVectorsFormat(CuVSResourceManager cuVSResourceManager) { + public GPUVectorsFormat(int maxConn, int beamWidth) { + this(CuVSResourceManager.pooling(), maxConn, beamWidth); + }; + + public GPUVectorsFormat(CuVSResourceManager cuVSResourceManager, int maxConn, int beamWidth) { super(NAME); this.cuVSResourceManager = cuVSResourceManager; + this.maxConn = maxConn; + this.beamWidth = beamWidth; } @Override public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException { - return new GPUToHNSWVectorsWriter( - cuVSResourceManager, - state, - DEFAULT_MAX_CONN, - DEFAULT_BEAM_WIDTH, - flatVectorsFormat.fieldsWriter(state) - ); + return new GPUToHNSWVectorsWriter(cuVSResourceManager, state, maxConn, beamWidth, flatVectorsFormat.fieldsWriter(state)); } @Override @@ -80,6 +79,6 @@ public int getMaxDimensions(String fieldName) { @Override public String toString() { - return NAME + "()"; + return NAME + "(maxConn=" + maxConn + ", beamWidth=" + beamWidth + ", flatVectorFormat=" + flatVectorsFormat.getName() + ")"; } } diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormatTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormatTests.java index b1fcc1b01b58e..8fd81ad75c994 100644 --- a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormatTests.java +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormatTests.java @@ -76,21 +76,6 @@ public void testMismatchedFields() throws Exception { // No bytes support } - @Override - public void testSortedIndex() throws Exception { - // TODO: implement sorted index - } - - @Override - public void testFloatVectorScorerIteration() throws Exception { - // TODO: implement sorted index - } - - @Override - public void testRandom() throws Exception { - // TODO: implement sorted index - } - public void testToString() { FilterCodec customCodec = new FilterCodec("foo", Codec.getDefault()) { @Override @@ -98,7 +83,7 @@ public KnnVectorsFormat knnVectorsFormat() { return new GPUVectorsFormat(); } }; - String expectedPattern = "GPUVectorsFormat()"; + String expectedPattern = "GPUVectorsFormat(maxConn=16, beamWidth=128, flatVectorFormat=Lucene99FlatVectorsFormat)"; assertEquals(expectedPattern, customCodec.knnVectorsFormat().toString()); } From f452dfc331ccfd00f65e23ef89d86f5d6df22bdb Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Tue, 19 Aug 2025 13:54:51 -0400 Subject: [PATCH 034/109] Add yml test --- .../test/gpu/{10_basic.yml => 10_hnsw.yml} | 15 +++++++++++++++ 1 file changed, 15 insertions(+) rename x-pack/plugin/gpu/src/yamlRestTest/resources/rest-api-spec/test/gpu/{10_basic.yml => 10_hnsw.yml} (90%) diff --git a/x-pack/plugin/gpu/src/yamlRestTest/resources/rest-api-spec/test/gpu/10_basic.yml b/x-pack/plugin/gpu/src/yamlRestTest/resources/rest-api-spec/test/gpu/10_hnsw.yml similarity index 90% rename from x-pack/plugin/gpu/src/yamlRestTest/resources/rest-api-spec/test/gpu/10_basic.yml rename to x-pack/plugin/gpu/src/yamlRestTest/resources/rest-api-spec/test/gpu/10_hnsw.yml index 02eea5b756938..978eaa354a4cd 100644 --- a/x-pack/plugin/gpu/src/yamlRestTest/resources/rest-api-spec/test/gpu/10_basic.yml +++ b/x-pack/plugin/gpu/src/yamlRestTest/resources/rest-api-spec/test/gpu/10_hnsw.yml @@ -99,3 +99,18 @@ k: 10 - match: { hits.total.value: 5 } - match: { hits.hits.0._id: "2" } + + - do: + indices.forcemerge: + index: my_vectors + max_num_segments: 1 + + - do: + search: + index: my_vectors + body: + knn: + field: embedding + query_vector: [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8] + k: 10 + - match: { hits.total.value: 5 } From 47d58d098824171b007d42a1631b842ab6857530 Mon Sep 17 00:00:00 2001 From: Brian Seeders Date: Tue, 19 Aug 2025 19:42:03 +0000 Subject: [PATCH 035/109] Update cuvs snapshot version to 25.10.0 --- .buildkite/scripts/cuvs-snapshot/current-snapshot-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version index 3f8919d7a54f3..e2bd27cd2cb82 100644 --- a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version +++ b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version @@ -1 +1 @@ -c987afa2 +e58b147d From 6c7f49f99002de07bcf07ddb331e97dcdefbb0eb Mon Sep 17 00:00:00 2001 From: Brian Seeders Date: Tue, 19 Aug 2025 19:43:58 +0000 Subject: [PATCH 036/109] Working script to configure cuvs snapshot and update current one --- .buildkite/scripts/cuvs-snapshot/configure.sh | 34 +++++++++++++------ .../update-current-snapshot-version.sh | 6 +++- 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/.buildkite/scripts/cuvs-snapshot/configure.sh b/.buildkite/scripts/cuvs-snapshot/configure.sh index ee81bd939166f..79f04249a9d01 100755 --- a/.buildkite/scripts/cuvs-snapshot/configure.sh +++ b/.buildkite/scripts/cuvs-snapshot/configure.sh @@ -2,16 +2,19 @@ set -euo pipefail -if [[ "${BUILDKITE:-}" == "true" ]]; then +if [[ -f /etc/profile.d/elastic-nvidia.sh ]]; then export JAVA_HOME="$HOME/.java/openjdk24" export PATH="$JAVA_HOME/bin:$PATH" # Setup LD_LIBRARY_PATH, PATH - if [[ -f /etc/profile.d/elastic-nvidia.sh ]]; then - source /etc/profile.d/elastic-nvidia.sh - fi + source /etc/profile.d/elastic-nvidia.sh fi +CUVS_WORKSPACE=/opt/elastic-cuvs +sudo rm -rf "$CUVS_WORKSPACE/cuvs" +sudo mkdir "$CUVS_WORKSPACE" +sudo chmod 777 "$CUVS_WORKSPACE" + CURRENT_SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" ELASTICSEARCH_REPO_DIR="$(cd "$CURRENT_SCRIPT_DIR/../../.." && pwd)" @@ -19,19 +22,30 @@ CUVS_SNAPSHOT_VERSION="${CUVS_SNAPSHOT_VERSION:-$(cat "$CURRENT_SCRIPT_DIR"/curr CUVS_ARCHIVE="cuvs-$CUVS_SNAPSHOT_VERSION.tar.gz" CUVS_URL="https://storage.googleapis.com/elasticsearch-cuvs-snapshots/$CUVS_ARCHIVE" -CUVS_WORKSPACE=${CUVS_WORKSPACE:-$(cd "$(mktemp -d)")} -CUVS_DIR="$(pwd)/cuvs-$CUVS_SNAPSHOT_VERSION" +# CUVS_WORKSPACE=${CUVS_WORKSPACE:-$(cd "$(mktemp -d)" && pwd)} +cd "$CUVS_WORKSPACE" +CUVS_DIR="$CUVS_WORKSPACE"/cuvs +mkdir -p "$CUVS_DIR" + +CUVS_SNAPSHOT_DIR="$CUVS_WORKSPACE/cuvs-$CUVS_SNAPSHOT_VERSION" curl -O "$CUVS_URL" tar -xzf "$CUVS_ARCHIVE" -CUVS_VERSION=$(cd "$CUVS_DIR/cuvs-java/target" && mvn help:evaluate -Dexpression=project.version -q -DforceStdout) +mv "$CUVS_SNAPSHOT_DIR"/cuvs "$CUVS_WORKSPACE/" + +CUVS_VERSION=$(cd "$CUVS_DIR/java/cuvs-java/target" && mvn help:evaluate -Dexpression=project.version -q -DforceStdout) + +LD_LIBRARY_PATH=$(echo "$LD_LIBRARY_PATH" | tr ':' '\n' | grep -v "cuvs/" | tr '\n' ':' | sed 's/:$//') +LD_LIBRARY_PATH="\ +${CUVS_DIR}/cpp/build/install/lib:\ +${CUVS_DIR}/cpp/build/_deps/rapids_logger-build:\ +${CUVS_DIR}/cpp/build/_deps/rmm-build:\ +$LD_LIBRARY_PATH" -LD_LIBRARY_PATH=$(echo "$LD_LIBRARY_PATH" | tr ':' '\n' | grep -v "libcuvs/linux-x64" | tr '\n' ':' | sed 's/:$//') -LD_LIBRARY_PATH="$CUVS_DIR/libcuvs/linux-x64:$LD_LIBRARY_PATH" export LD_LIBRARY_PATH -cd "$CUVS_DIR/cuvs-java/target" +cd "$CUVS_DIR/java/cuvs-java/target" mvn install:install-file -Dfile="cuvs-java-$CUVS_VERSION.jar" cd "$ELASTICSEARCH_REPO_DIR" diff --git a/.buildkite/scripts/cuvs-snapshot/update-current-snapshot-version.sh b/.buildkite/scripts/cuvs-snapshot/update-current-snapshot-version.sh index f651d2e2e44cf..ec65ca7878b1d 100755 --- a/.buildkite/scripts/cuvs-snapshot/update-current-snapshot-version.sh +++ b/.buildkite/scripts/cuvs-snapshot/update-current-snapshot-version.sh @@ -2,6 +2,7 @@ set -euo pipefail +WORKSPACE="${WORKSPACE:-$(pwd)}" SNAPSHOT_VERSION_FILE=.buildkite/scripts/cuvs-snapshot/current-snapshot-version PLUGIN_GRADLE_FILE=x-pack/plugin/gpu/build.gradle BRANCH_TO_UPDATE="${BRANCH_TO_UPDATE:-${BUILDKITE_BRANCH:-cuvs-snapshot}}" @@ -19,6 +20,9 @@ fi echo "--- Configuring libcuvs/cuvs-java" source .buildkite/scripts/cuvs-snapshot/configure.sh +echo JAVA_HOME=$JAVA_HOME; +echo LD_LIBRARY_PATH=$LD_LIBRARY_PATH; + if [[ "${SKIP_TESTING:-}" != "true" ]]; then echo "--- Testing snapshot before updating" ./gradlew -Druntime.java=24 :x-pack:plugin:gpu:yamlRestTest -S @@ -30,7 +34,7 @@ echo "$CUVS_SNAPSHOT_VERSION" > "$SNAPSHOT_VERSION_FILE" CURRENT_SHA="$(gh api "/repos/elastic/elasticsearch/contents/$SNAPSHOT_VERSION_FILE?ref=$BRANCH_TO_UPDATE" | jq -r .sha)" || true -echo gh api -X PUT "/repos/elastic/elasticsearch/contents/$SNAPSHOT_VERSION_FILE" \ +gh api -X PUT "/repos/elastic/elasticsearch/contents/$SNAPSHOT_VERSION_FILE" \ -f branch="$BRANCH_TO_UPDATE" \ -f message="Update cuvs snapshot version to $CUVS_VERSION" \ -f content="$(base64 -w 0 "$WORKSPACE/$SNAPSHOT_VERSION_FILE")" \ From 0145af05acd7595b689abc223e737d7f5b69ed5c Mon Sep 17 00:00:00 2001 From: Brian Seeders Date: Tue, 19 Aug 2025 20:08:59 +0000 Subject: [PATCH 037/109] Update cuvs snapshot version to 25.10.0 From e778480d25438fa2635c19598e2d836912adf7fb Mon Sep 17 00:00:00 2001 From: Brian Seeders Date: Tue, 19 Aug 2025 20:11:08 +0000 Subject: [PATCH 038/109] Update cuvs snapshot version to 25.10.0 From 9c8417cc185471e1bb75362a5977c3856af873ca Mon Sep 17 00:00:00 2001 From: Brian Seeders Date: Tue, 19 Aug 2025 20:35:05 +0000 Subject: [PATCH 039/109] Update cuvs snapshot version to 25.10.0 From 7e0cc3d6e194750c4f537626348088712021db6f Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Wed, 20 Aug 2025 11:50:08 -0400 Subject: [PATCH 040/109] Rename GpuVectorsFormat to EsGpuHnswVectorsFormat We want to introduce a new int8 format for GPU, with its anticipation rename GpuVectorsFormat to EsGpuHnswVectorsFormat --- .../test/knn/KnnIndexTester.java | 4 ++-- .../plugin/gpu/src/main/java/module-info.java | 2 +- .../elasticsearch/xpack/gpu/GPUPlugin.java | 6 +++--- ...ormat.java => ESGpuHnswVectorsFormat.java} | 16 ++++++++------- ...riter.java => ESGpuHnswVectorsWriter.java} | 20 +++++++++---------- .../org.apache.lucene.codecs.KnnVectorsFormat | 2 +- ....java => ESGpuHnswVectorsFormatTests.java} | 8 ++++---- .../codec/GPUDenseVectorFieldMapperTests.java | 4 ++-- 8 files changed, 32 insertions(+), 30 deletions(-) rename x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/{GPUVectorsFormat.java => ESGpuHnswVectorsFormat.java} (85%) rename x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/{GPUToHNSWVectorsWriter.java => ESGpuHnswVectorsWriter.java} (96%) rename x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/{GPUVectorsFormatTests.java => ESGpuHnswVectorsFormatTests.java} (89%) diff --git a/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java b/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java index 58aae61ea6892..c35c8644292e2 100644 --- a/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java +++ b/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java @@ -31,7 +31,7 @@ import org.elasticsearch.xcontent.XContentParser; import org.elasticsearch.xcontent.XContentParserConfiguration; import org.elasticsearch.xcontent.XContentType; -import org.elasticsearch.xpack.gpu.codec.GPUVectorsFormat; +import org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat; import java.io.InputStream; import java.lang.management.ThreadInfo; @@ -95,7 +95,7 @@ static Codec createCodec(CmdLineArgs args) { if (args.indexType() == IndexType.IVF) { format = new IVFVectorsFormat(args.ivfClusterSize()); } else if (args.indexType() == IndexType.GPU) { - format = new GPUVectorsFormat(); + format = new ESGpuHnswVectorsFormat(); } else { if (args.quantizeBits() == 1) { if (args.indexType() == IndexType.FLAT) { diff --git a/x-pack/plugin/gpu/src/main/java/module-info.java b/x-pack/plugin/gpu/src/main/java/module-info.java index d08bf6b101a4c..c3d67e755e04f 100644 --- a/x-pack/plugin/gpu/src/main/java/module-info.java +++ b/x-pack/plugin/gpu/src/main/java/module-info.java @@ -16,6 +16,6 @@ exports org.elasticsearch.xpack.gpu.codec; - provides org.apache.lucene.codecs.KnnVectorsFormat with org.elasticsearch.xpack.gpu.codec.GPUVectorsFormat; + provides org.apache.lucene.codecs.KnnVectorsFormat with org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat; provides org.elasticsearch.features.FeatureSpecification with org.elasticsearch.xpack.gpu.GPUFeatures; } diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java index f376e19a6d980..a729e2582da42 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java @@ -14,7 +14,7 @@ import org.elasticsearch.index.mapper.vectors.VectorsFormatProvider; import org.elasticsearch.plugins.MapperPlugin; import org.elasticsearch.plugins.Plugin; -import org.elasticsearch.xpack.gpu.codec.GPUVectorsFormat; +import org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat; public class GPUPlugin extends Plugin implements MapperPlugin { @@ -60,9 +60,9 @@ private static KnnVectorsFormat getVectorsFormat(DenseVectorFieldMapper.DenseVec DenseVectorFieldMapper.HnswIndexOptions hnswIndexOptions = (DenseVectorFieldMapper.HnswIndexOptions) indexOptions; int efConstruction = hnswIndexOptions.efConstruction(); if (efConstruction == HnswGraphBuilder.DEFAULT_BEAM_WIDTH) { - efConstruction = GPUVectorsFormat.DEFAULT_BEAM_WIDTH; // default value for GPU graph construction is 128 + efConstruction = ESGpuHnswVectorsFormat.DEFAULT_BEAM_WIDTH; // default value for GPU graph construction is 128 } - return new GPUVectorsFormat(hnswIndexOptions.m(), efConstruction); + return new ESGpuHnswVectorsFormat(hnswIndexOptions.m(), efConstruction); } else { throw new IllegalArgumentException( "GPU vector indexing is not supported on this vector type: [" + indexOptions.getType() + "]" diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormat.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsFormat.java similarity index 85% rename from x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormat.java rename to x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsFormat.java index 8a6243d40410f..474e035f2d160 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormat.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsFormat.java @@ -19,12 +19,14 @@ import java.io.IOException; +import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT; + /** * Codec format for GPU-accelerated vector indexes. This format is designed to * leverage GPU processing capabilities for vector search operations. */ -public class GPUVectorsFormat extends KnnVectorsFormat { - public static final String NAME = "GPUVectorsFormat"; +public class ESGpuHnswVectorsFormat extends KnnVectorsFormat { + public static final String NAME = "ESGpuHnswVectorsFormat"; public static final int VERSION_START = 0; static final String LUCENE99_HNSW_META_CODEC_NAME = "Lucene99HnswVectorsFormatMeta"; @@ -47,15 +49,15 @@ public class GPUVectorsFormat extends KnnVectorsFormat { private final int beamWidth; final CuVSResourceManager cuVSResourceManager; - public GPUVectorsFormat() { + public ESGpuHnswVectorsFormat() { this(CuVSResourceManager.pooling(), DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH); } - public GPUVectorsFormat(int maxConn, int beamWidth) { + public ESGpuHnswVectorsFormat(int maxConn, int beamWidth) { this(CuVSResourceManager.pooling(), maxConn, beamWidth); }; - public GPUVectorsFormat(CuVSResourceManager cuVSResourceManager, int maxConn, int beamWidth) { + public ESGpuHnswVectorsFormat(CuVSResourceManager cuVSResourceManager, int maxConn, int beamWidth) { super(NAME); this.cuVSResourceManager = cuVSResourceManager; this.maxConn = maxConn; @@ -64,7 +66,7 @@ public GPUVectorsFormat(CuVSResourceManager cuVSResourceManager, int maxConn, in @Override public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException { - return new GPUToHNSWVectorsWriter(cuVSResourceManager, state, maxConn, beamWidth, flatVectorsFormat.fieldsWriter(state)); + return new ESGpuHnswVectorsWriter(cuVSResourceManager, state, maxConn, beamWidth, flatVectorsFormat.fieldsWriter(state)); } @Override @@ -74,7 +76,7 @@ public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException @Override public int getMaxDimensions(String fieldName) { - return 4096; + return MAX_DIMS_COUNT; } @Override diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java similarity index 96% rename from x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java rename to x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java index 4968a659e73e7..3335bd353e1c1 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/GPUToHNSWVectorsWriter.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java @@ -50,20 +50,20 @@ import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.SIMILARITY_FUNCTIONS; import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; -import static org.elasticsearch.xpack.gpu.codec.GPUVectorsFormat.LUCENE99_HNSW_META_CODEC_NAME; -import static org.elasticsearch.xpack.gpu.codec.GPUVectorsFormat.LUCENE99_HNSW_META_EXTENSION; -import static org.elasticsearch.xpack.gpu.codec.GPUVectorsFormat.LUCENE99_HNSW_VECTOR_INDEX_CODEC_NAME; -import static org.elasticsearch.xpack.gpu.codec.GPUVectorsFormat.LUCENE99_HNSW_VECTOR_INDEX_EXTENSION; -import static org.elasticsearch.xpack.gpu.codec.GPUVectorsFormat.LUCENE99_VERSION_CURRENT; -import static org.elasticsearch.xpack.gpu.codec.GPUVectorsFormat.MIN_NUM_VECTORS_FOR_GPU_BUILD; +import static org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat.LUCENE99_HNSW_META_CODEC_NAME; +import static org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat.LUCENE99_HNSW_META_EXTENSION; +import static org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat.LUCENE99_HNSW_VECTOR_INDEX_CODEC_NAME; +import static org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat.LUCENE99_HNSW_VECTOR_INDEX_EXTENSION; +import static org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat.LUCENE99_VERSION_CURRENT; +import static org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat.MIN_NUM_VECTORS_FOR_GPU_BUILD; /** * Writer that builds a Nvidia Carga Graph on GPU and than writes it into the Lucene99 HNSW format, * so that it can be searched on CPU with Lucene99HNSWVectorReader. */ -final class GPUToHNSWVectorsWriter extends KnnVectorsWriter { - private static final Logger logger = LogManager.getLogger(GPUToHNSWVectorsWriter.class); - private static final long SHALLOW_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(GPUToHNSWVectorsWriter.class); +final class ESGpuHnswVectorsWriter extends KnnVectorsWriter { + private static final Logger logger = LogManager.getLogger(ESGpuHnswVectorsWriter.class); + private static final long SHALLOW_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(ESGpuHnswVectorsWriter.class); private static final int LUCENE99_HNSW_DIRECT_MONOTONIC_BLOCK_SHIFT = 16; private final CuVSResourceManager cuVSResourceManager; @@ -76,7 +76,7 @@ final class GPUToHNSWVectorsWriter extends KnnVectorsWriter { private final List fields = new ArrayList<>(); private boolean finished; - GPUToHNSWVectorsWriter( + ESGpuHnswVectorsWriter( CuVSResourceManager cuVSResourceManager, SegmentWriteState state, int M, diff --git a/x-pack/plugin/gpu/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat b/x-pack/plugin/gpu/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat index 00aa6aa7a153c..7489c844fb527 100644 --- a/x-pack/plugin/gpu/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat +++ b/x-pack/plugin/gpu/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat @@ -1,2 +1,2 @@ -org.elasticsearch.xpack.gpu.codec.GPUVectorsFormat +org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormatTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsFormatTests.java similarity index 89% rename from x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormatTests.java rename to x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsFormatTests.java index 8fd81ad75c994..7d1f3f8283bf3 100644 --- a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUVectorsFormatTests.java +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsFormatTests.java @@ -17,7 +17,7 @@ import org.elasticsearch.xpack.gpu.GPUSupport; import org.junit.BeforeClass; -public class GPUVectorsFormatTests extends BaseKnnVectorsFormatTestCase { +public class ESGpuHnswVectorsFormatTests extends BaseKnnVectorsFormatTestCase { static { LogConfigurator.loadLog4jPlugins(); @@ -29,7 +29,7 @@ public static void beforeClass() { assumeTrue("cuvs not supported", GPUSupport.isSupported(false)); } - static final Codec codec = TestUtil.alwaysKnnVectorsFormat(new GPUVectorsFormat()); + static final Codec codec = TestUtil.alwaysKnnVectorsFormat(new ESGpuHnswVectorsFormat()); @Override protected Codec getCodec() { @@ -80,10 +80,10 @@ public void testToString() { FilterCodec customCodec = new FilterCodec("foo", Codec.getDefault()) { @Override public KnnVectorsFormat knnVectorsFormat() { - return new GPUVectorsFormat(); + return new ESGpuHnswVectorsFormat(); } }; - String expectedPattern = "GPUVectorsFormat(maxConn=16, beamWidth=128, flatVectorFormat=Lucene99FlatVectorsFormat)"; + String expectedPattern = "ESGpuHnswVectorsFormat(maxConn=16, beamWidth=128, flatVectorFormat=Lucene99FlatVectorsFormat)"; assertEquals(expectedPattern, customCodec.knnVectorsFormat().toString()); } diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java index 1a1ecf9f85ec4..6703429318946 100644 --- a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java @@ -39,7 +39,7 @@ protected Collection getPlugins() { return Collections.singletonList(plugin); } - public void testKnnGPUVectorsFormat() throws IOException { + public void testKnnESGPUHnswVectorsFormat() throws IOException { final int dims = randomIntBetween(128, 4096); MapperService mapperService = createMapperService(fieldMapping(b -> { b.field("type", "dense_vector"); @@ -63,7 +63,7 @@ public void testKnnGPUVectorsFormat() throws IOException { assertThat(codec, instanceOf(LegacyPerFieldMapperCodec.class)); knnVectorsFormat = ((LegacyPerFieldMapperCodec) codec).getKnnVectorsFormatForField("field"); } - String expectedString = "GPUVectorsFormat()"; + String expectedString = "ESGpuHnswVectorsFormat(maxConn=16, beamWidth=128, flatVectorFormat=Lucene99FlatVectorsFormat)"; assertEquals(expectedString, knnVectorsFormat.toString()); } } From b03d2be95ac5d25d969897f4d7f1214ce9a3f26f Mon Sep 17 00:00:00 2001 From: Brian Seeders Date: Wed, 20 Aug 2025 15:11:51 -0400 Subject: [PATCH 041/109] Revert some of the changes made while testing bug fixes, add nvidia-smi command, add run-gradle script --- .buildkite/scripts/cuvs-snapshot/configure.sh | 30 ++++++------------- .../cuvs-snapshot/current-snapshot-version | 2 +- .../scripts/cuvs-snapshot/run-gradle.sh | 9 ++++++ .../update-current-snapshot-version.sh | 6 +--- 4 files changed, 20 insertions(+), 27 deletions(-) create mode 100755 .buildkite/scripts/cuvs-snapshot/run-gradle.sh diff --git a/.buildkite/scripts/cuvs-snapshot/configure.sh b/.buildkite/scripts/cuvs-snapshot/configure.sh index 79f04249a9d01..6563f15fde1db 100755 --- a/.buildkite/scripts/cuvs-snapshot/configure.sh +++ b/.buildkite/scripts/cuvs-snapshot/configure.sh @@ -10,10 +10,9 @@ if [[ -f /etc/profile.d/elastic-nvidia.sh ]]; then source /etc/profile.d/elastic-nvidia.sh fi -CUVS_WORKSPACE=/opt/elastic-cuvs -sudo rm -rf "$CUVS_WORKSPACE/cuvs" -sudo mkdir "$CUVS_WORKSPACE" -sudo chmod 777 "$CUVS_WORKSPACE" +# Not running this before the tests results in an error when running the tests +# No idea why... +nvidia-smi CURRENT_SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" ELASTICSEARCH_REPO_DIR="$(cd "$CURRENT_SCRIPT_DIR/../../.." && pwd)" @@ -22,30 +21,19 @@ CUVS_SNAPSHOT_VERSION="${CUVS_SNAPSHOT_VERSION:-$(cat "$CURRENT_SCRIPT_DIR"/curr CUVS_ARCHIVE="cuvs-$CUVS_SNAPSHOT_VERSION.tar.gz" CUVS_URL="https://storage.googleapis.com/elasticsearch-cuvs-snapshots/$CUVS_ARCHIVE" -# CUVS_WORKSPACE=${CUVS_WORKSPACE:-$(cd "$(mktemp -d)" && pwd)} -cd "$CUVS_WORKSPACE" -CUVS_DIR="$CUVS_WORKSPACE"/cuvs -mkdir -p "$CUVS_DIR" - -CUVS_SNAPSHOT_DIR="$CUVS_WORKSPACE/cuvs-$CUVS_SNAPSHOT_VERSION" +CUVS_WORKSPACE=${CUVS_WORKSPACE:-$(cd "$(mktemp -d)")} +CUVS_DIR="$(pwd)/cuvs-$CUVS_SNAPSHOT_VERSION" curl -O "$CUVS_URL" tar -xzf "$CUVS_ARCHIVE" -mv "$CUVS_SNAPSHOT_DIR"/cuvs "$CUVS_WORKSPACE/" - -CUVS_VERSION=$(cd "$CUVS_DIR/java/cuvs-java/target" && mvn help:evaluate -Dexpression=project.version -q -DforceStdout) - -LD_LIBRARY_PATH=$(echo "$LD_LIBRARY_PATH" | tr ':' '\n' | grep -v "cuvs/" | tr '\n' ':' | sed 's/:$//') -LD_LIBRARY_PATH="\ -${CUVS_DIR}/cpp/build/install/lib:\ -${CUVS_DIR}/cpp/build/_deps/rapids_logger-build:\ -${CUVS_DIR}/cpp/build/_deps/rmm-build:\ -$LD_LIBRARY_PATH" +CUVS_VERSION=$(cd "$CUVS_DIR/cuvs-java/target" && mvn help:evaluate -Dexpression=project.version -q -DforceStdout) +LD_LIBRARY_PATH=$(echo "$LD_LIBRARY_PATH" | tr ':' '\n' | grep -v "libcuvs/linux-x64" | tr '\n' ':' | sed 's/:$//') +LD_LIBRARY_PATH="$CUVS_DIR/libcuvs/linux-x64:$LD_LIBRARY_PATH" export LD_LIBRARY_PATH -cd "$CUVS_DIR/java/cuvs-java/target" +cd "$CUVS_DIR/cuvs-java/target" mvn install:install-file -Dfile="cuvs-java-$CUVS_VERSION.jar" cd "$ELASTICSEARCH_REPO_DIR" diff --git a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version index e2bd27cd2cb82..3f8919d7a54f3 100644 --- a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version +++ b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version @@ -1 +1 @@ -e58b147d +c987afa2 diff --git a/.buildkite/scripts/cuvs-snapshot/run-gradle.sh b/.buildkite/scripts/cuvs-snapshot/run-gradle.sh new file mode 100755 index 0000000000000..4824981f5817f --- /dev/null +++ b/.buildkite/scripts/cuvs-snapshot/run-gradle.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +set -euo pipefail + +source .buildkite/scripts/cuvs-snapshot/configure.sh + +cd "$WORKSPACE" + +.ci/scripts/run-gradle.sh "$@" diff --git a/.buildkite/scripts/cuvs-snapshot/update-current-snapshot-version.sh b/.buildkite/scripts/cuvs-snapshot/update-current-snapshot-version.sh index ec65ca7878b1d..f651d2e2e44cf 100755 --- a/.buildkite/scripts/cuvs-snapshot/update-current-snapshot-version.sh +++ b/.buildkite/scripts/cuvs-snapshot/update-current-snapshot-version.sh @@ -2,7 +2,6 @@ set -euo pipefail -WORKSPACE="${WORKSPACE:-$(pwd)}" SNAPSHOT_VERSION_FILE=.buildkite/scripts/cuvs-snapshot/current-snapshot-version PLUGIN_GRADLE_FILE=x-pack/plugin/gpu/build.gradle BRANCH_TO_UPDATE="${BRANCH_TO_UPDATE:-${BUILDKITE_BRANCH:-cuvs-snapshot}}" @@ -20,9 +19,6 @@ fi echo "--- Configuring libcuvs/cuvs-java" source .buildkite/scripts/cuvs-snapshot/configure.sh -echo JAVA_HOME=$JAVA_HOME; -echo LD_LIBRARY_PATH=$LD_LIBRARY_PATH; - if [[ "${SKIP_TESTING:-}" != "true" ]]; then echo "--- Testing snapshot before updating" ./gradlew -Druntime.java=24 :x-pack:plugin:gpu:yamlRestTest -S @@ -34,7 +30,7 @@ echo "$CUVS_SNAPSHOT_VERSION" > "$SNAPSHOT_VERSION_FILE" CURRENT_SHA="$(gh api "/repos/elastic/elasticsearch/contents/$SNAPSHOT_VERSION_FILE?ref=$BRANCH_TO_UPDATE" | jq -r .sha)" || true -gh api -X PUT "/repos/elastic/elasticsearch/contents/$SNAPSHOT_VERSION_FILE" \ +echo gh api -X PUT "/repos/elastic/elasticsearch/contents/$SNAPSHOT_VERSION_FILE" \ -f branch="$BRANCH_TO_UPDATE" \ -f message="Update cuvs snapshot version to $CUVS_VERSION" \ -f content="$(base64 -w 0 "$WORKSPACE/$SNAPSHOT_VERSION_FILE")" \ From 0d51a86316c535353fc96709ac95f15fc3f65769 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Wed, 20 Aug 2025 16:22:53 -0400 Subject: [PATCH 042/109] And int8_hswn option --- server/src/main/java/module-info.java | 2 +- .../vectors/DenseVectorFieldMapper.java | 12 +++ .../plugin/gpu/src/main/java/module-info.java | 7 +- .../elasticsearch/xpack/gpu/GPUPlugin.java | 21 +++-- .../gpu/codec/ESGpuHnswSQVectorsFormat.java | 90 +++++++++++++++++++ .../gpu/codec/ESGpuHnswVectorsFormat.java | 11 ++- .../org.apache.lucene.codecs.KnnVectorsFormat | 1 + .../codec/ESGpuHnswSQVectorsFormatTests.java | 76 ++++++++++++++++ .../codec/ESGpuHnswVectorsFormatTests.java | 13 --- .../codec/GPUDenseVectorFieldMapperTests.java | 25 ++++-- 10 files changed, 229 insertions(+), 29 deletions(-) create mode 100644 x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswSQVectorsFormat.java create mode 100644 x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswSQVectorsFormatTests.java diff --git a/server/src/main/java/module-info.java b/server/src/main/java/module-info.java index 90cd3c669a52c..91485db30d78c 100644 --- a/server/src/main/java/module-info.java +++ b/server/src/main/java/module-info.java @@ -482,7 +482,7 @@ exports org.elasticsearch.plugins.internal.rewriter to org.elasticsearch.inference; exports org.elasticsearch.lucene.util.automaton; exports org.elasticsearch.index.codec.perfield; - exports org.elasticsearch.index.codec.vectors to org.elasticsearch.test.knn; + exports org.elasticsearch.index.codec.vectors to org.elasticsearch.test.knn, org.elasticsearch.gpu; exports org.elasticsearch.index.codec.vectors.es818 to org.elasticsearch.test.knn; exports org.elasticsearch.inference.telemetry; } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java index 48af91065aca9..35c9caae78017 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java @@ -2082,6 +2082,18 @@ boolean isFlat() { return false; } + public int m() { + return m; + } + + public int efConstruction() { + return efConstruction; + } + + public Float confidenceInterval() { + return confidenceInterval; + } + @Override public String toString() { return "{type=" diff --git a/x-pack/plugin/gpu/src/main/java/module-info.java b/x-pack/plugin/gpu/src/main/java/module-info.java index c3d67e755e04f..a5cc05edc6c92 100644 --- a/x-pack/plugin/gpu/src/main/java/module-info.java +++ b/x-pack/plugin/gpu/src/main/java/module-info.java @@ -5,7 +5,7 @@ * 2.0. */ -/** Provides GPU-accelerated support for vector search. */ +/** Provides GPU-accelerated support for vector indexing. */ module org.elasticsearch.gpu { requires org.elasticsearch.logging; requires org.apache.lucene.core; @@ -16,6 +16,9 @@ exports org.elasticsearch.xpack.gpu.codec; - provides org.apache.lucene.codecs.KnnVectorsFormat with org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat; provides org.elasticsearch.features.FeatureSpecification with org.elasticsearch.xpack.gpu.GPUFeatures; + provides org.apache.lucene.codecs.KnnVectorsFormat + with + org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat, + org.elasticsearch.xpack.gpu.codec.ESGpuHnswSQVectorsFormat; } diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java index a729e2582da42..cd469b2d25828 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java @@ -14,6 +14,7 @@ import org.elasticsearch.index.mapper.vectors.VectorsFormatProvider; import org.elasticsearch.plugins.MapperPlugin; import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.xpack.gpu.codec.ESGpuHnswSQVectorsFormat; import org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat; public class GPUPlugin extends Plugin implements MapperPlugin { @@ -28,10 +29,7 @@ public VectorsFormatProvider getVectorsFormatProvider() { if (gpuMode == IndexSettings.GpuMode.TRUE) { if (vectorIndexTypeSupported(indexOptions.getType()) == false) { throw new IllegalArgumentException( - "[index.vectors.indexing.use_gpu] was set to [true], but GPU vector indexing is only supported " - + "for [hnsw] index_options.type, got: [" - + indexOptions.getType() - + "]" + "[index.vectors.indexing.use_gpu] doesn't support [index_options.type] of [" + indexOptions.getType() + "]." ); } if (GPUSupport.isSupported(true) == false) { @@ -52,7 +50,7 @@ && vectorIndexTypeSupported(indexOptions.getType()) } private boolean vectorIndexTypeSupported(DenseVectorFieldMapper.VectorIndexType type) { - return type == DenseVectorFieldMapper.VectorIndexType.HNSW; + return type == DenseVectorFieldMapper.VectorIndexType.HNSW || type == DenseVectorFieldMapper.VectorIndexType.INT8_HNSW; } private static KnnVectorsFormat getVectorsFormat(DenseVectorFieldMapper.DenseVectorIndexOptions indexOptions) { @@ -63,6 +61,19 @@ private static KnnVectorsFormat getVectorsFormat(DenseVectorFieldMapper.DenseVec efConstruction = ESGpuHnswVectorsFormat.DEFAULT_BEAM_WIDTH; // default value for GPU graph construction is 128 } return new ESGpuHnswVectorsFormat(hnswIndexOptions.m(), efConstruction); + } else if (indexOptions.getType() == DenseVectorFieldMapper.VectorIndexType.INT8_HNSW) { + DenseVectorFieldMapper.Int8HnswIndexOptions int8HnswIndexOptions = (DenseVectorFieldMapper.Int8HnswIndexOptions) indexOptions; + int efConstruction = int8HnswIndexOptions.efConstruction(); + if (efConstruction == HnswGraphBuilder.DEFAULT_BEAM_WIDTH) { + efConstruction = ESGpuHnswVectorsFormat.DEFAULT_BEAM_WIDTH; // default value for GPU graph construction is 128 + } + return new ESGpuHnswSQVectorsFormat( + int8HnswIndexOptions.m(), + efConstruction, + int8HnswIndexOptions.confidenceInterval(), + 7, + false + ); } else { throw new IllegalArgumentException( "GPU vector indexing is not supported on this vector type: [" + indexOptions.getType() + "]" diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswSQVectorsFormat.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswSQVectorsFormat.java new file mode 100644 index 0000000000000..400a855db6d6b --- /dev/null +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswSQVectorsFormat.java @@ -0,0 +1,90 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.gpu.codec; + +import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.KnnVectorsReader; +import org.apache.lucene.codecs.KnnVectorsWriter; +import org.apache.lucene.codecs.hnsw.FlatVectorsFormat; +import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SegmentWriteState; +import org.elasticsearch.index.codec.vectors.ES814ScalarQuantizedVectorsFormat; + +import java.io.IOException; + +import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT; +import static org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat.DEFAULT_BEAM_WIDTH; +import static org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat.DEFAULT_MAX_CONN; + +/** + * Codec format for GPU-accelerated scalar quantized HNSW vector indexes. + * HNSW graph is built on GPU, while scalar quantization and search is performed on CPU. + */ +public class ESGpuHnswSQVectorsFormat extends KnnVectorsFormat { + public static final String NAME = "ESGPUHnswScalarQuantizedVectorsFormat"; + static final int MAXIMUM_MAX_CONN = 512; + static final int MAXIMUM_BEAM_WIDTH = 3200; + private final int maxConn; + private final int beamWidth; + + /** The format for storing, reading, merging vectors on disk */ + private final FlatVectorsFormat flatVectorsFormat; + final CuVSResourceManager cuVSResourceManager; + + public ESGpuHnswSQVectorsFormat() { + this(DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH, null, 7, false); + } + + public ESGpuHnswSQVectorsFormat(int maxConn, int beamWidth, Float confidenceInterval, int bits, boolean compress) { + super(NAME); + this.cuVSResourceManager = CuVSResourceManager.pooling(); + if (maxConn <= 0 || maxConn > MAXIMUM_MAX_CONN) { + throw new IllegalArgumentException( + "maxConn must be positive and less than or equal to " + MAXIMUM_MAX_CONN + "; maxConn=" + maxConn + ); + } + if (beamWidth <= 0 || beamWidth > MAXIMUM_BEAM_WIDTH) { + throw new IllegalArgumentException( + "beamWidth must be positive and less than or equal to " + MAXIMUM_BEAM_WIDTH + "; beamWidth=" + beamWidth + ); + } + this.maxConn = maxConn; + this.beamWidth = beamWidth; + this.flatVectorsFormat = new ES814ScalarQuantizedVectorsFormat(confidenceInterval, bits, compress); + } + + @Override + public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException { + return new ESGpuHnswVectorsWriter(cuVSResourceManager, state, maxConn, beamWidth, flatVectorsFormat.fieldsWriter(state)); + } + + @Override + public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException { + return new Lucene99HnswVectorsReader(state, flatVectorsFormat.fieldsReader(state)); + } + + @Override + public int getMaxDimensions(String fieldName) { + return MAX_DIMS_COUNT; + } + + @Override + public String toString() { + return NAME + + "(name=" + + NAME + + ", maxConn=" + + maxConn + + ", beamWidth=" + + beamWidth + + ", flatVectorFormat=" + + flatVectorsFormat + + ")"; + } +} diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsFormat.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsFormat.java index 474e035f2d160..b06b452435c83 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsFormat.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsFormat.java @@ -81,6 +81,15 @@ public int getMaxDimensions(String fieldName) { @Override public String toString() { - return NAME + "(maxConn=" + maxConn + ", beamWidth=" + beamWidth + ", flatVectorFormat=" + flatVectorsFormat.getName() + ")"; + return NAME + + "(name=" + + NAME + + ", maxConn=" + + maxConn + + ", beamWidth=" + + beamWidth + + ", flatVectorFormat=" + + flatVectorsFormat.getName() + + ")"; } } diff --git a/x-pack/plugin/gpu/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat b/x-pack/plugin/gpu/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat index 7489c844fb527..5065e1674b351 100644 --- a/x-pack/plugin/gpu/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat +++ b/x-pack/plugin/gpu/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat @@ -1,2 +1,3 @@ org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat +org.elasticsearch.xpack.gpu.codec.ESGpuHnswSQVectorsFormat diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswSQVectorsFormatTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswSQVectorsFormatTests.java new file mode 100644 index 0000000000000..7c2dce8adcfec --- /dev/null +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswSQVectorsFormatTests.java @@ -0,0 +1,76 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ +package org.elasticsearch.xpack.gpu.codec; + +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.index.VectorEncoding; +import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; +import org.apache.lucene.tests.util.TestUtil; +import org.elasticsearch.common.logging.LogConfigurator; +import org.elasticsearch.xpack.gpu.GPUSupport; +import org.junit.BeforeClass; + +public class ESGpuHnswSQVectorsFormatTests extends BaseKnnVectorsFormatTestCase { + + static { + LogConfigurator.loadLog4jPlugins(); + LogConfigurator.configureESLogging(); // native access requires logging to be initialized + } + + @BeforeClass + public static void beforeClass() { + assumeTrue("cuvs not supported", GPUSupport.isSupported(false)); + } + + static final Codec codec = TestUtil.alwaysKnnVectorsFormat(new ESGpuHnswSQVectorsFormat()); + + @Override + protected Codec getCodec() { + return codec; + } + + @Override + protected VectorSimilarityFunction randomSimilarity() { + return VectorSimilarityFunction.values()[random().nextInt(VectorSimilarityFunction.values().length)]; + } + + @Override + protected VectorEncoding randomVectorEncoding() { + return VectorEncoding.FLOAT32; + } + + @Override + public void testRandomBytes() { + // No bytes support + } + + @Override + public void testSortedIndexBytes() { + // No bytes support + } + + @Override + public void testByteVectorScorerIteration() { + // No bytes support + } + + @Override + public void testEmptyByteVectorData() { + // No bytes support + } + + @Override + public void testMergingWithDifferentByteKnnFields() { + // No bytes support + } + + @Override + public void testMismatchedFields() { + // No bytes support + } +} diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsFormatTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsFormatTests.java index 7d1f3f8283bf3..de9f84cd91179 100644 --- a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsFormatTests.java +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsFormatTests.java @@ -7,8 +7,6 @@ package org.elasticsearch.xpack.gpu.codec; import org.apache.lucene.codecs.Codec; -import org.apache.lucene.codecs.FilterCodec; -import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.index.VectorEncoding; import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; @@ -76,15 +74,4 @@ public void testMismatchedFields() throws Exception { // No bytes support } - public void testToString() { - FilterCodec customCodec = new FilterCodec("foo", Codec.getDefault()) { - @Override - public KnnVectorsFormat knnVectorsFormat() { - return new ESGpuHnswVectorsFormat(); - } - }; - String expectedPattern = "ESGpuHnswVectorsFormat(maxConn=16, beamWidth=128, flatVectorFormat=Lucene99FlatVectorsFormat)"; - assertEquals(expectedPattern, customCodec.knnVectorsFormat().toString()); - } - } diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java index 6703429318946..da510407f8774 100644 --- a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java @@ -39,7 +39,21 @@ protected Collection getPlugins() { return Collections.singletonList(plugin); } - public void testKnnESGPUHnswVectorsFormat() throws IOException { + public void testESGPUHnswVectorsFormat() throws IOException { + KnnVectorsFormat knnVectorsFormat = getKnnVectorsFormat("hnsw"); + String expectedStr = "ESGpuHnswVectorsFormat(name=ESGpuHnswVectorsFormat, " + + "maxConn=16, beamWidth=128, flatVectorFormat=Lucene99FlatVectorsFormat)"; + assertEquals(expectedStr, knnVectorsFormat.toString()); + } + + public void testESGpuHnswScalarQuantizedVectorsFormat() throws IOException { + KnnVectorsFormat knnVectorsFormat = getKnnVectorsFormat("int8_hnsw"); + String expectedStr = "ESGPUHnswScalarQuantizedVectorsFormat(name=ESGPUHnswScalarQuantizedVectorsFormat, " + + "maxConn=16, beamWidth=128, flatVectorFormat=ES814ScalarQuantizedVectorsFormat"; + assertTrue(knnVectorsFormat.toString().startsWith(expectedStr)); + } + + private KnnVectorsFormat getKnnVectorsFormat(String indexOptionsType) throws IOException { final int dims = randomIntBetween(128, 4096); MapperService mapperService = createMapperService(fieldMapping(b -> { b.field("type", "dense_vector"); @@ -47,23 +61,20 @@ public void testKnnESGPUHnswVectorsFormat() throws IOException { b.field("index", true); b.field("similarity", "dot_product"); b.startObject("index_options"); - b.field("type", "hnsw"); + b.field("type", indexOptionsType); b.endObject(); })); CodecService codecService = new CodecService(mapperService, BigArrays.NON_RECYCLING_INSTANCE); Codec codec = codecService.codec("default"); - KnnVectorsFormat knnVectorsFormat; if (CodecService.ZSTD_STORED_FIELDS_FEATURE_FLAG) { assertThat(codec, instanceOf(PerFieldMapperCodec.class)); - knnVectorsFormat = ((PerFieldMapperCodec) codec).getKnnVectorsFormatForField("field"); + return ((PerFieldMapperCodec) codec).getKnnVectorsFormatForField("field"); } else { if (codec instanceof CodecService.DeduplicateFieldInfosCodec deduplicateFieldInfosCodec) { codec = deduplicateFieldInfosCodec.delegate(); } assertThat(codec, instanceOf(LegacyPerFieldMapperCodec.class)); - knnVectorsFormat = ((LegacyPerFieldMapperCodec) codec).getKnnVectorsFormatForField("field"); + return ((LegacyPerFieldMapperCodec) codec).getKnnVectorsFormatForField("field"); } - String expectedString = "ESGpuHnswVectorsFormat(maxConn=16, beamWidth=128, flatVectorFormat=Lucene99FlatVectorsFormat)"; - assertEquals(expectedString, knnVectorsFormat.toString()); } } From b20033f2c8b1a841220d8659fa317ab655ddec4d Mon Sep 17 00:00:00 2001 From: Brian Seeders Date: Thu, 21 Aug 2025 13:49:07 -0400 Subject: [PATCH 043/109] Add new gpu/cuvs pipelines --- .../pipelines/cuvs-snapshot/run-tests.yml | 20 +++++++++++++++ .../cuvs-snapshot/update-snapshot.yml | 14 +++++++++++ .buildkite/pipelines/pull-request/gpu.yml | 25 +++++++++++++++++++ .../update-current-snapshot-version.sh | 5 ++-- 4 files changed, 61 insertions(+), 3 deletions(-) create mode 100644 .buildkite/pipelines/cuvs-snapshot/run-tests.yml create mode 100644 .buildkite/pipelines/cuvs-snapshot/update-snapshot.yml create mode 100644 .buildkite/pipelines/pull-request/gpu.yml diff --git a/.buildkite/pipelines/cuvs-snapshot/run-tests.yml b/.buildkite/pipelines/cuvs-snapshot/run-tests.yml new file mode 100644 index 0000000000000..7ce84487f24c7 --- /dev/null +++ b/.buildkite/pipelines/cuvs-snapshot/run-tests.yml @@ -0,0 +1,20 @@ +steps: + - label: "{{matrix.GRADLE_TASK}}" + command: .buildkite/scripts/cuvs-snapshot/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-file-fingerprints {{matrix.GRADLE_TASK}} + timeout_in_minutes: 300 + agents: + provider: gcp + image: family/elasticsearch-ubuntu-2404-nvidia + machineType: g2-standard-32 + buildDirectory: /dev/shm/bk + matrix: + setup: + GRADLE_TASK: + - checkPart1 + - checkPart2 + - checkPart3 + - checkPart4 + - checkPart5 + - checkPart6 + env: + GRADLE_TASK: "{{matrix.GRADLE_TASK}}" diff --git a/.buildkite/pipelines/cuvs-snapshot/update-snapshot.yml b/.buildkite/pipelines/cuvs-snapshot/update-snapshot.yml new file mode 100644 index 0000000000000..c105b1e6d2a0c --- /dev/null +++ b/.buildkite/pipelines/cuvs-snapshot/update-snapshot.yml @@ -0,0 +1,14 @@ +steps: + - label: "Smoke test and update new cuVS snapshot" + command: .buildkite/scripts/cuvs-snapshot/update-current-snapshot-version.sh + agents: + provider: gcp + image: family/elasticsearch-ubuntu-2404-nvidia + machineType: g2-standard-16 + zones: us-central1-a + diskSizeGb: 150 + - wait: ~ + - trigger: "elasticsearch-cuvs-run-tests" + build: + branch: "${BUILDKITE_BRANCH}" + async: true diff --git a/.buildkite/pipelines/pull-request/gpu.yml b/.buildkite/pipelines/pull-request/gpu.yml new file mode 100644 index 0000000000000..89399c0c4faa9 --- /dev/null +++ b/.buildkite/pipelines/pull-request/gpu.yml @@ -0,0 +1,25 @@ +config: + allow-labels: test-gpu +steps: + - group: gpu-tests + steps: + - label: "{{matrix.CHECK_TASK}} / gpu-tests" + key: "packaging-tests-unix" + command: .buildkite/scripts/cuvs-snapshot/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-file-fingerprints {{matrix.GRADLE_TASK}} + timeout_in_minutes: 300 + matrix: + setup: + CHECK_TASK: + - checkPart1 + - checkPart2 + - checkPart3 + - checkPart4 + - checkPart5 + - checkPart6 + agents: + provider: gcp + image: family/elasticsearch-ubuntu-2404-nvidia + machineType: g2-standard-32 + buildDirectory: /dev/shm/bk + env: + GRADLE_TASK: "{{matrix.GRADLE_TASK}}" diff --git a/.buildkite/scripts/cuvs-snapshot/update-current-snapshot-version.sh b/.buildkite/scripts/cuvs-snapshot/update-current-snapshot-version.sh index f651d2e2e44cf..17c83e2f5504c 100755 --- a/.buildkite/scripts/cuvs-snapshot/update-current-snapshot-version.sh +++ b/.buildkite/scripts/cuvs-snapshot/update-current-snapshot-version.sh @@ -3,7 +3,6 @@ set -euo pipefail SNAPSHOT_VERSION_FILE=.buildkite/scripts/cuvs-snapshot/current-snapshot-version -PLUGIN_GRADLE_FILE=x-pack/plugin/gpu/build.gradle BRANCH_TO_UPDATE="${BRANCH_TO_UPDATE:-${BUILDKITE_BRANCH:-cuvs-snapshot}}" if [[ -z "${CUVS_SNAPSHOT_VERSION:-}" ]]; then @@ -22,7 +21,7 @@ source .buildkite/scripts/cuvs-snapshot/configure.sh if [[ "${SKIP_TESTING:-}" != "true" ]]; then echo "--- Testing snapshot before updating" ./gradlew -Druntime.java=24 :x-pack:plugin:gpu:yamlRestTest -S -fi +fi echo "--- Updating snapshot" @@ -30,7 +29,7 @@ echo "$CUVS_SNAPSHOT_VERSION" > "$SNAPSHOT_VERSION_FILE" CURRENT_SHA="$(gh api "/repos/elastic/elasticsearch/contents/$SNAPSHOT_VERSION_FILE?ref=$BRANCH_TO_UPDATE" | jq -r .sha)" || true -echo gh api -X PUT "/repos/elastic/elasticsearch/contents/$SNAPSHOT_VERSION_FILE" \ +gh api -X PUT "/repos/elastic/elasticsearch/contents/$SNAPSHOT_VERSION_FILE" \ -f branch="$BRANCH_TO_UPDATE" \ -f message="Update cuvs snapshot version to $CUVS_VERSION" \ -f content="$(base64 -w 0 "$WORKSPACE/$SNAPSHOT_VERSION_FILE")" \ From cb076d79c6879e68d595e60077479a2d4116af4e Mon Sep 17 00:00:00 2001 From: Brian Seeders Date: Thu, 21 Aug 2025 13:56:20 -0400 Subject: [PATCH 044/109] Set -u later --- .buildkite/scripts/cuvs-snapshot/configure.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.buildkite/scripts/cuvs-snapshot/configure.sh b/.buildkite/scripts/cuvs-snapshot/configure.sh index 6563f15fde1db..383259bbb2ab9 100755 --- a/.buildkite/scripts/cuvs-snapshot/configure.sh +++ b/.buildkite/scripts/cuvs-snapshot/configure.sh @@ -1,15 +1,18 @@ #!/bin/bash -set -euo pipefail +set -eo pipefail if [[ -f /etc/profile.d/elastic-nvidia.sh ]]; then export JAVA_HOME="$HOME/.java/openjdk24" export PATH="$JAVA_HOME/bin:$PATH" # Setup LD_LIBRARY_PATH, PATH + source /etc/profile.d/elastic-nvidia.sh fi +set -u + # Not running this before the tests results in an error when running the tests # No idea why... nvidia-smi From 7abc031d29c3e55ed6ddd3dfd525ff185860306d Mon Sep 17 00:00:00 2001 From: Brian Seeders Date: Thu, 21 Aug 2025 14:18:33 -0400 Subject: [PATCH 045/109] Set empty LD_LIBRARY_PATH --- .buildkite/scripts/cuvs-snapshot/configure.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.buildkite/scripts/cuvs-snapshot/configure.sh b/.buildkite/scripts/cuvs-snapshot/configure.sh index 383259bbb2ab9..32e83ee9f7246 100755 --- a/.buildkite/scripts/cuvs-snapshot/configure.sh +++ b/.buildkite/scripts/cuvs-snapshot/configure.sh @@ -1,6 +1,6 @@ #!/bin/bash -set -eo pipefail +set -euo pipefail if [[ -f /etc/profile.d/elastic-nvidia.sh ]]; then export JAVA_HOME="$HOME/.java/openjdk24" @@ -8,11 +8,10 @@ if [[ -f /etc/profile.d/elastic-nvidia.sh ]]; then # Setup LD_LIBRARY_PATH, PATH + export LD_LIBRARY_PATH="${LD_LIBRARY_PATH:-}" source /etc/profile.d/elastic-nvidia.sh fi -set -u - # Not running this before the tests results in an error when running the tests # No idea why... nvidia-smi From c04f605de32837a61e6b189f96e681550321a02f Mon Sep 17 00:00:00 2001 From: "elastic-vault-github-plugin-prod[bot]" <150874479+elastic-vault-github-plugin-prod[bot]@users.noreply.github.com> Date: Thu, 21 Aug 2025 18:28:36 +0000 Subject: [PATCH 046/109] Update cuvs snapshot version to 25.10.0 --- .buildkite/scripts/cuvs-snapshot/current-snapshot-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version index 3f8919d7a54f3..e4afab683b081 100644 --- a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version +++ b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version @@ -1 +1 @@ -c987afa2 +8d36f7a2 From 877ff013821d13fefffd34d3860bc5cc7ca110da Mon Sep 17 00:00:00 2001 From: Brian Seeders Date: Thu, 21 Aug 2025 14:37:02 -0400 Subject: [PATCH 047/109] Move env to proper location --- .buildkite/pipelines/cuvs-snapshot/run-tests.yml | 4 ++-- .buildkite/pipelines/pull-request/gpu.yml | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.buildkite/pipelines/cuvs-snapshot/run-tests.yml b/.buildkite/pipelines/cuvs-snapshot/run-tests.yml index 7ce84487f24c7..f7891fe3a9c31 100644 --- a/.buildkite/pipelines/cuvs-snapshot/run-tests.yml +++ b/.buildkite/pipelines/cuvs-snapshot/run-tests.yml @@ -7,6 +7,8 @@ steps: image: family/elasticsearch-ubuntu-2404-nvidia machineType: g2-standard-32 buildDirectory: /dev/shm/bk + env: + GRADLE_TASK: "{{matrix.GRADLE_TASK}}" matrix: setup: GRADLE_TASK: @@ -16,5 +18,3 @@ steps: - checkPart4 - checkPart5 - checkPart6 - env: - GRADLE_TASK: "{{matrix.GRADLE_TASK}}" diff --git a/.buildkite/pipelines/pull-request/gpu.yml b/.buildkite/pipelines/pull-request/gpu.yml index 89399c0c4faa9..45b053b44517a 100644 --- a/.buildkite/pipelines/pull-request/gpu.yml +++ b/.buildkite/pipelines/pull-request/gpu.yml @@ -7,6 +7,13 @@ steps: key: "packaging-tests-unix" command: .buildkite/scripts/cuvs-snapshot/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-file-fingerprints {{matrix.GRADLE_TASK}} timeout_in_minutes: 300 + agents: + provider: gcp + image: family/elasticsearch-ubuntu-2404-nvidia + machineType: g2-standard-32 + buildDirectory: /dev/shm/bk + env: + GRADLE_TASK: "{{matrix.GRADLE_TASK}}" matrix: setup: CHECK_TASK: @@ -16,10 +23,3 @@ steps: - checkPart4 - checkPart5 - checkPart6 - agents: - provider: gcp - image: family/elasticsearch-ubuntu-2404-nvidia - machineType: g2-standard-32 - buildDirectory: /dev/shm/bk - env: - GRADLE_TASK: "{{matrix.GRADLE_TASK}}" From c281bf518d3e194dec467b5c742eab45968e3a74 Mon Sep 17 00:00:00 2001 From: Brian Seeders Date: Thu, 21 Aug 2025 14:45:14 -0400 Subject: [PATCH 048/109] Update zones for g2 instances --- .buildkite/pipelines/cuvs-snapshot/run-tests.yml | 1 + .buildkite/pipelines/cuvs-snapshot/update-snapshot.yml | 2 +- .buildkite/pipelines/pull-request/gpu.yml | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.buildkite/pipelines/cuvs-snapshot/run-tests.yml b/.buildkite/pipelines/cuvs-snapshot/run-tests.yml index f7891fe3a9c31..fe4e2f8cefd70 100644 --- a/.buildkite/pipelines/cuvs-snapshot/run-tests.yml +++ b/.buildkite/pipelines/cuvs-snapshot/run-tests.yml @@ -7,6 +7,7 @@ steps: image: family/elasticsearch-ubuntu-2404-nvidia machineType: g2-standard-32 buildDirectory: /dev/shm/bk + zones: us-central1-b,us-central1-c env: GRADLE_TASK: "{{matrix.GRADLE_TASK}}" matrix: diff --git a/.buildkite/pipelines/cuvs-snapshot/update-snapshot.yml b/.buildkite/pipelines/cuvs-snapshot/update-snapshot.yml index c105b1e6d2a0c..6011926d782bb 100644 --- a/.buildkite/pipelines/cuvs-snapshot/update-snapshot.yml +++ b/.buildkite/pipelines/cuvs-snapshot/update-snapshot.yml @@ -5,7 +5,7 @@ steps: provider: gcp image: family/elasticsearch-ubuntu-2404-nvidia machineType: g2-standard-16 - zones: us-central1-a + zones: us-central1-b,us-central1-c diskSizeGb: 150 - wait: ~ - trigger: "elasticsearch-cuvs-run-tests" diff --git a/.buildkite/pipelines/pull-request/gpu.yml b/.buildkite/pipelines/pull-request/gpu.yml index 45b053b44517a..0286366bbd791 100644 --- a/.buildkite/pipelines/pull-request/gpu.yml +++ b/.buildkite/pipelines/pull-request/gpu.yml @@ -12,6 +12,7 @@ steps: image: family/elasticsearch-ubuntu-2404-nvidia machineType: g2-standard-32 buildDirectory: /dev/shm/bk + zones: us-central1-b,us-central1-c env: GRADLE_TASK: "{{matrix.GRADLE_TASK}}" matrix: From 4ce32d6d5e4baa5059ee6257693a5409cb285d59 Mon Sep 17 00:00:00 2001 From: Brian Seeders Date: Thu, 21 Aug 2025 14:53:25 -0400 Subject: [PATCH 049/109] Fix env var --- .buildkite/pipelines/pull-request/gpu.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.buildkite/pipelines/pull-request/gpu.yml b/.buildkite/pipelines/pull-request/gpu.yml index 0286366bbd791..5a4fa4e53ff0d 100644 --- a/.buildkite/pipelines/pull-request/gpu.yml +++ b/.buildkite/pipelines/pull-request/gpu.yml @@ -3,7 +3,7 @@ config: steps: - group: gpu-tests steps: - - label: "{{matrix.CHECK_TASK}} / gpu-tests" + - label: "{{matrix.GRADLE_TASK}} / gpu-tests" key: "packaging-tests-unix" command: .buildkite/scripts/cuvs-snapshot/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-file-fingerprints {{matrix.GRADLE_TASK}} timeout_in_minutes: 300 @@ -17,7 +17,7 @@ steps: GRADLE_TASK: "{{matrix.GRADLE_TASK}}" matrix: setup: - CHECK_TASK: + GRADLE_TASK: - checkPart1 - checkPart2 - checkPart3 From f8f7c3991938b063e491d009e02a3c3e0f365173 Mon Sep 17 00:00:00 2001 From: Brian Seeders Date: Thu, 21 Aug 2025 15:03:41 -0400 Subject: [PATCH 050/109] test-gpu should be a skip-label --- .buildkite/pipelines/pull-request/.defaults.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.buildkite/pipelines/pull-request/.defaults.yml b/.buildkite/pipelines/pull-request/.defaults.yml index 84d73cbd738a2..f0f87ce6c86b4 100644 --- a/.buildkite/pipelines/pull-request/.defaults.yml +++ b/.buildkite/pipelines/pull-request/.defaults.yml @@ -1,5 +1,7 @@ config: - skip-labels: ">test-mute" + skip-labels: + - ">test-mute" + - test-gpu # TODO remove this once the non-gpu steps work correctly excluded-regions: - ^docs/.* - ^x-pack/docs/.* From 91f676fa44fc29bdf4394db862e21b4bd4a907ab Mon Sep 17 00:00:00 2001 From: Brian Seeders Date: Thu, 21 Aug 2025 15:08:08 -0400 Subject: [PATCH 051/109] Adjust skip labels --- .buildkite/pipelines/pull-request/bwc-snapshots.yml | 2 +- .buildkite/pipelines/pull-request/gpu.yml | 2 ++ .../pipelines/pull-request/packaging-tests-unix-sample.yml | 1 + .../pipelines/pull-request/packaging-tests-windows-sample.yml | 1 + 4 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.buildkite/pipelines/pull-request/bwc-snapshots.yml b/.buildkite/pipelines/pull-request/bwc-snapshots.yml index 739deb2db92c4..e665398431286 100644 --- a/.buildkite/pipelines/pull-request/bwc-snapshots.yml +++ b/.buildkite/pipelines/pull-request/bwc-snapshots.yml @@ -2,6 +2,7 @@ config: trigger-phrase: '.*run\W+elasticsearch-ci/bwc.*' skip-labels: - ">test-mute" + - test-gpu # TODO remove this once the non-gpu steps work correctly - "test-full-bwc" steps: - group: bwc-snapshots @@ -78,4 +79,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n1-standard-32 buildDirectory: /dev/shm/bk - diff --git a/.buildkite/pipelines/pull-request/gpu.yml b/.buildkite/pipelines/pull-request/gpu.yml index 5a4fa4e53ff0d..40afa272ead7c 100644 --- a/.buildkite/pipelines/pull-request/gpu.yml +++ b/.buildkite/pipelines/pull-request/gpu.yml @@ -1,5 +1,7 @@ config: allow-labels: test-gpu + skip-labels: + - ">test-mute" steps: - group: gpu-tests steps: diff --git a/.buildkite/pipelines/pull-request/packaging-tests-unix-sample.yml b/.buildkite/pipelines/pull-request/packaging-tests-unix-sample.yml index b1b619cc833f3..d11b3644baa96 100644 --- a/.buildkite/pipelines/pull-request/packaging-tests-unix-sample.yml +++ b/.buildkite/pipelines/pull-request/packaging-tests-unix-sample.yml @@ -1,6 +1,7 @@ config: skip-labels: - ">test-mute" + - test-gpu # TODO remove this once the non-gpu steps work correctly - ":Delivery/Packaging" steps: - group: packaging-tests-unix-sample diff --git a/.buildkite/pipelines/pull-request/packaging-tests-windows-sample.yml b/.buildkite/pipelines/pull-request/packaging-tests-windows-sample.yml index 6e8fec65f75a5..0cecd72b9cbb5 100644 --- a/.buildkite/pipelines/pull-request/packaging-tests-windows-sample.yml +++ b/.buildkite/pipelines/pull-request/packaging-tests-windows-sample.yml @@ -1,6 +1,7 @@ config: skip-labels: - ">test-mute" + - test-gpu # TODO remove this once the non-gpu steps work correctly - ":Delivery/Packaging" steps: - group: packaging-tests-windows-sample From 9327371f38f8e2e99b8dfc123b348f827f705fd6 Mon Sep 17 00:00:00 2001 From: "elastic-vault-github-plugin-prod[bot]" <150874479+elastic-vault-github-plugin-prod[bot]@users.noreply.github.com> Date: Thu, 21 Aug 2025 20:13:21 +0000 Subject: [PATCH 052/109] Update cuvs snapshot version to 25.10.0 --- .buildkite/scripts/cuvs-snapshot/current-snapshot-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version index e4afab683b081..6c8febc8a7925 100644 --- a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version +++ b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version @@ -1 +1 @@ -8d36f7a2 +699e933a From 1a727fa0d3269fd4d8e61692118f5dfd93da5645 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Fri, 22 Aug 2025 10:31:15 -0400 Subject: [PATCH 053/109] Add int8 to KnnIndexTester --- .../test/knn/KnnIndexTester.java | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java b/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java index c35c8644292e2..eabf0d5c7b693 100644 --- a/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java +++ b/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java @@ -31,6 +31,7 @@ import org.elasticsearch.xcontent.XContentParser; import org.elasticsearch.xcontent.XContentParserConfiguration; import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xpack.gpu.codec.ESGpuHnswSQVectorsFormat; import org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat; import java.io.InputStream; @@ -68,15 +69,15 @@ enum IndexType { HNSW, FLAT, IVF, - GPU + GPU_HNSW } private static String formatIndexPath(CmdLineArgs args) { List suffix = new ArrayList<>(); if (args.indexType() == IndexType.FLAT) { suffix.add("flat"); - } else if (args.indexType() == IndexType.GPU) { - suffix.add("gpu"); + } else if (args.indexType() == IndexType.GPU_HNSW) { + suffix.add("gpu_hnsw"); } else if (args.indexType() == IndexType.IVF) { suffix.add("ivf"); suffix.add(Integer.toString(args.ivfClusterSize())); @@ -94,8 +95,16 @@ static Codec createCodec(CmdLineArgs args) { final KnnVectorsFormat format; if (args.indexType() == IndexType.IVF) { format = new IVFVectorsFormat(args.ivfClusterSize()); - } else if (args.indexType() == IndexType.GPU) { - format = new ESGpuHnswVectorsFormat(); + } else if (args.indexType() == IndexType.GPU_HNSW) { + if (args.quantizeBits() == 32) { + format = new ESGpuHnswVectorsFormat(); + } else if (args.quantizeBits() == 7) { + format = new ESGpuHnswSQVectorsFormat(); + } else { + throw new IllegalArgumentException( + "GPU HNSW index type only supports 7 or 32 bits quantization, but got: " + args.quantizeBits() + ); + } } else { if (args.quantizeBits() == 1) { if (args.indexType() == IndexType.FLAT) { From 5d1ef893bc42dd80c1ecfe242c3ac37d5a651d80 Mon Sep 17 00:00:00 2001 From: "elastic-vault-github-plugin-prod[bot]" <150874479+elastic-vault-github-plugin-prod[bot]@users.noreply.github.com> Date: Sat, 23 Aug 2025 04:31:12 +0000 Subject: [PATCH 054/109] Update cuvs snapshot version to 25.10.0 --- .buildkite/scripts/cuvs-snapshot/current-snapshot-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version index 6c8febc8a7925..71f83aa9948e6 100644 --- a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version +++ b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version @@ -1 +1 @@ -699e933a +50ed7109 From 913430abb7a2e0abe6a3ae1f48da0bbcbfc35dfa Mon Sep 17 00:00:00 2001 From: "elastic-vault-github-plugin-prod[bot]" <150874479+elastic-vault-github-plugin-prod[bot]@users.noreply.github.com> Date: Tue, 26 Aug 2025 04:33:44 +0000 Subject: [PATCH 055/109] Update cuvs snapshot version to 25.10.0 --- .buildkite/scripts/cuvs-snapshot/current-snapshot-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version index 71f83aa9948e6..6278fd41bb389 100644 --- a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version +++ b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version @@ -1 +1 @@ -50ed7109 +0b1b1ec4 From f9dd63e347f1ce418b50c3a8adfc0d80bcf82c85 Mon Sep 17 00:00:00 2001 From: "elastic-vault-github-plugin-prod[bot]" <150874479+elastic-vault-github-plugin-prod[bot]@users.noreply.github.com> Date: Wed, 27 Aug 2025 04:41:45 +0000 Subject: [PATCH 056/109] Update cuvs snapshot version to 25.10.0 --- .buildkite/scripts/cuvs-snapshot/current-snapshot-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version index 6278fd41bb389..ffcc1010d8e4f 100644 --- a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version +++ b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version @@ -1 +1 @@ -0b1b1ec4 +73672634 From cff3e948fd4c8f295c1b6980bab26da22c00ebc1 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Thu, 28 Aug 2025 15:31:57 -0400 Subject: [PATCH 057/109] Send quantized data to GPU for index building during merge --- .../ES814ScalarQuantizedVectorsFormat.java | 2 +- .../xpack/gpu/codec/DatasetUtils.java | 2 +- .../xpack/gpu/codec/DatasetUtilsImpl.java | 2 +- .../gpu/codec/ESGpuHnswVectorsWriter.java | 47 ++- .../codec/MergedQuantizedVectorValues.java | 359 ++++++++++++++++++ .../xpack/gpu/codec/DatasetUtilsImpl.java | 7 +- .../xpack/gpu/codec/DatasetUtilsTests.java | 8 +- 7 files changed, 415 insertions(+), 12 deletions(-) create mode 100644 x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/MergedQuantizedVectorValues.java diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java index 3dde6fab00d4c..c6416a13cbb92 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java @@ -132,7 +132,7 @@ public FlatVectorsReader fieldsReader(SegmentReadState state) throws IOException ); } - static final class ES814ScalarQuantizedVectorsWriter extends FlatVectorsWriter { + public static final class ES814ScalarQuantizedVectorsWriter extends FlatVectorsWriter { final Lucene99ScalarQuantizedVectorsWriter delegate; diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtils.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtils.java index bdc598e876931..84fc01ec2974f 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtils.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtils.java @@ -20,5 +20,5 @@ static DatasetUtils getInstance() { } /** Returns a Dataset over the float32 vectors in the input. */ - CuVSMatrix fromInput(MemorySegmentAccessInput input, int numVectors, int dims) throws IOException; + CuVSMatrix fromInput(MemorySegmentAccessInput input, int numVectors, int dims, CuVSMatrix.DataType dataType) throws IOException; } diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java index 0cca7ce5cd7cb..2be5d582c4cb6 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java @@ -19,7 +19,7 @@ static DatasetUtils getInstance() { } @Override - public CuVSMatrix fromInput(MemorySegmentAccessInput input, int numVectors, int dims) { + public CuVSMatrix fromInput(MemorySegmentAccessInput input, int numVectors, int dims, CuVSMatrix.DataType dataType) { throw new UnsupportedOperationException("should not reach here"); } } diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java index 3335bd353e1c1..6b738dfeabce9 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java @@ -16,6 +16,7 @@ import org.apache.lucene.codecs.KnnVectorsWriter; import org.apache.lucene.codecs.hnsw.FlatFieldVectorsWriter; import org.apache.lucene.codecs.hnsw.FlatVectorsWriter; +import org.apache.lucene.index.ByteVectorValues; import org.apache.lucene.index.DocsWithFieldSet; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FloatVectorValues; @@ -35,8 +36,10 @@ import org.apache.lucene.util.hnsw.HnswGraph; import org.apache.lucene.util.hnsw.HnswGraph.NodesIterator; import org.apache.lucene.util.packed.DirectMonotonicWriter; +import org.apache.lucene.util.quantization.ScalarQuantizer; import org.elasticsearch.core.IOUtils; import org.elasticsearch.core.SuppressForbidden; +import org.elasticsearch.index.codec.vectors.ES814ScalarQuantizedVectorsFormat; import org.elasticsearch.logging.LogManager; import org.elasticsearch.logging.Logger; @@ -49,6 +52,7 @@ import java.util.Objects; import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.SIMILARITY_FUNCTIONS; +import static org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsWriter.mergeAndRecalculateQuantiles; import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; import static org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat.LUCENE99_HNSW_META_CODEC_NAME; import static org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat.LUCENE99_HNSW_META_EXTENSION; @@ -75,6 +79,7 @@ final class ESGpuHnswVectorsWriter extends KnnVectorsWriter { private final List fields = new ArrayList<>(); private boolean finished; + private final CuVSMatrix.DataType dataType; ESGpuHnswVectorsWriter( CuVSResourceManager cuVSResourceManager, @@ -88,6 +93,11 @@ final class ESGpuHnswVectorsWriter extends KnnVectorsWriter { this.M = M; this.beamWidth = beamWidth; this.flatVectorWriter = flatVectorWriter; + if (flatVectorWriter instanceof ES814ScalarQuantizedVectorsFormat.ES814ScalarQuantizedVectorsWriter) { + dataType = CuVSMatrix.DataType.BYTE; + } else { + dataType = CuVSMatrix.DataType.FLOAT; + } this.segmentWriteState = state; String metaFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, LUCENE99_HNSW_META_EXTENSION); String indexDataFileName = IndexFileNames.segmentFileName( @@ -411,13 +421,17 @@ public NodesIterator getNodesOnLevel(int level) { @SuppressForbidden(reason = "require usage of Lucene's IOUtils#deleteFilesIgnoringExceptions(...)") public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOException { flatVectorWriter.mergeOneField(fieldInfo, mergeState); - // save merged vector values to a temp file final int numVectors; String tempRawVectorsFileName = null; boolean success = false; + // save merged vector values to a temp file try (IndexOutput out = mergeState.segmentInfo.dir.createTempOutput(mergeState.segmentInfo.name, "vec_", IOContext.DEFAULT)) { tempRawVectorsFileName = out.getName(); - numVectors = writeFloatVectorValues(fieldInfo, out, MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState)); + if (dataType == CuVSMatrix.DataType.BYTE) { + numVectors = writeByteVectorValues(out, getMergedByteVectorValues(fieldInfo, mergeState)); + } else { + numVectors = writeFloatVectorValues(fieldInfo, out, MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState)); + } CodecUtil.writeFooter(out); success = true; } finally { @@ -429,9 +443,11 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE DatasetOrVectors datasetOrVectors; var input = FilterIndexInput.unwrapOnlyTest(in); if (input instanceof MemorySegmentAccessInput memorySegmentAccessInput && numVectors >= MIN_NUM_VECTORS_FOR_GPU_BUILD) { - var ds = DatasetUtils.getInstance().fromInput(memorySegmentAccessInput, numVectors, fieldInfo.getVectorDimension()); + var ds = DatasetUtils.getInstance() + .fromInput(memorySegmentAccessInput, numVectors, fieldInfo.getVectorDimension(), dataType); datasetOrVectors = DatasetOrVectors.fromDataset(ds); } else { + // TODO fix for byte vectors var fa = copyVectorsIntoArray(in, fieldInfo, numVectors); datasetOrVectors = DatasetOrVectors.fromArray(fa); } @@ -441,6 +457,31 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE } } + private ByteVectorValues getMergedByteVectorValues(FieldInfo fieldInfo, MergeState mergeState) throws IOException { + // TODO: expose confidence interval from the format + final byte bits = 7; + final Float confidenceInterval = null; + ScalarQuantizer quantizer = mergeAndRecalculateQuantiles(mergeState, fieldInfo, confidenceInterval, bits); + MergedQuantizedVectorValues byteVectorValues = MergedQuantizedVectorValues.mergeQuantizedByteVectorValues( + fieldInfo, + mergeState, + quantizer + ); + return byteVectorValues; + } + + private static int writeByteVectorValues(IndexOutput out, ByteVectorValues vectorValues) throws IOException { + int numVectors = 0; + byte[] vector; + final KnnVectorValues.DocIndexIterator iterator = vectorValues.iterator(); + for (int docV = iterator.nextDoc(); docV != NO_MORE_DOCS; docV = iterator.nextDoc()) { + numVectors++; + vector = vectorValues.vectorValue(iterator.index()); + out.writeBytes(vector, vector.length); + } + return numVectors; + } + static float[][] copyVectorsIntoArray(IndexInput in, FieldInfo fieldInfo, int numVectors) throws IOException { final FloatVectorValues floatVectorValues = getFloatVectorValues(fieldInfo, in, numVectors); float[][] vectors = new float[numVectors][fieldInfo.getVectorDimension()]; diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/MergedQuantizedVectorValues.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/MergedQuantizedVectorValues.java new file mode 100644 index 0000000000000..32355723bde5e --- /dev/null +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/MergedQuantizedVectorValues.java @@ -0,0 +1,359 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.gpu.codec; + +import org.apache.lucene.codecs.KnnVectorsReader; +import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; +import org.apache.lucene.index.DocIDMerger; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FloatVectorValues; +import org.apache.lucene.index.KnnVectorValues; +import org.apache.lucene.index.MergeState; +import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.search.VectorScorer; +import org.apache.lucene.util.VectorUtil; +import org.apache.lucene.util.quantization.QuantizedByteVectorValues; +import org.apache.lucene.util.quantization.QuantizedVectorsReader; +import org.apache.lucene.util.quantization.ScalarQuantizer; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import static org.apache.lucene.codecs.KnnVectorsWriter.MergedVectorValues.hasVectorValues; + +/** + * A copy from Lucene99ScalarQuantizedVectorsWriter to access mergeQuantizedByteVectorValues + * during segment merge. + */ +class MergedQuantizedVectorValues extends QuantizedByteVectorValues { + private static final float REQUANTIZATION_LIMIT = 0.2f; + + private final List subs; + private final DocIDMerger docIdMerger; + private final int size; + private QuantizedByteVectorValueSub current; + + private MergedQuantizedVectorValues(List subs, MergeState mergeState) throws IOException { + this.subs = subs; + docIdMerger = DocIDMerger.of(subs, mergeState.needsIndexSort); + int totalSize = 0; + for (QuantizedByteVectorValueSub sub : subs) { + totalSize += sub.values.size(); + } + size = totalSize; + } + + @Override + public byte[] vectorValue(int ord) throws IOException { + return current.values.vectorValue(current.index()); + } + + @Override + public DocIndexIterator iterator() { + return new MergedQuantizedVectorValues.CompositeIterator(); + } + + @Override + public int size() { + return size; + } + + @Override + public int dimension() { + return subs.get(0).values.dimension(); + } + + @Override + public float getScoreCorrectionConstant(int ord) throws IOException { + return current.values.getScoreCorrectionConstant(current.index()); + } + + private class CompositeIterator extends DocIndexIterator { + private int docId; + private int ord; + + CompositeIterator() { + docId = -1; + ord = -1; + } + + @Override + public int index() { + return ord; + } + + @Override + public int docID() { + return docId; + } + + @Override + public int nextDoc() throws IOException { + current = docIdMerger.next(); + if (current == null) { + docId = NO_MORE_DOCS; + ord = NO_MORE_DOCS; + } else { + docId = current.mappedDocID; + ++ord; + } + return docId; + } + + @Override + public int advance(int target) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public long cost() { + return size; + } + } + + private static QuantizedVectorsReader getQuantizedKnnVectorsReader(KnnVectorsReader vectorsReader, String fieldName) { + if (vectorsReader instanceof PerFieldKnnVectorsFormat.FieldsReader candidateReader) { + vectorsReader = candidateReader.getFieldReader(fieldName); + } + if (vectorsReader instanceof QuantizedVectorsReader reader) { + return reader; + } + return null; + } + + static MergedQuantizedVectorValues mergeQuantizedByteVectorValues( + FieldInfo fieldInfo, + MergeState mergeState, + ScalarQuantizer scalarQuantizer + ) throws IOException { + assert fieldInfo != null && fieldInfo.hasVectorValues(); + + List subs = new ArrayList<>(); + for (int i = 0; i < mergeState.knnVectorsReaders.length; i++) { + if (hasVectorValues(mergeState.fieldInfos[i], fieldInfo.name)) { + QuantizedVectorsReader reader = getQuantizedKnnVectorsReader(mergeState.knnVectorsReaders[i], fieldInfo.name); + assert scalarQuantizer != null; + final QuantizedByteVectorValueSub sub; + // Either our quantization parameters are way different than the merged ones + // Or we have never been quantized. + if (reader == null || reader.getQuantizationState(fieldInfo.name) == null + // For smaller `bits` values, we should always recalculate the quantiles + // TODO: this is very conservative, could we reuse information for even int4 + // quantization? + || scalarQuantizer.getBits() <= 4 + || shouldRequantize(reader.getQuantizationState(fieldInfo.name), scalarQuantizer)) { + FloatVectorValues toQuantize = mergeState.knnVectorsReaders[i].getFloatVectorValues(fieldInfo.name); + if (fieldInfo.getVectorSimilarityFunction() == VectorSimilarityFunction.COSINE) { + toQuantize = new NormalizedFloatVectorValues(toQuantize); + } + sub = new QuantizedByteVectorValueSub( + mergeState.docMaps[i], + new QuantizedFloatVectorValues(toQuantize, fieldInfo.getVectorSimilarityFunction(), scalarQuantizer) + ); + } else { + sub = new QuantizedByteVectorValueSub( + mergeState.docMaps[i], + new OffsetCorrectedQuantizedByteVectorValues( + reader.getQuantizedVectorValues(fieldInfo.name), + fieldInfo.getVectorSimilarityFunction(), + scalarQuantizer, + reader.getQuantizationState(fieldInfo.name) + ) + ); + } + subs.add(sub); + } + } + return new MergedQuantizedVectorValues(subs, mergeState); + } + + private static boolean shouldRequantize(ScalarQuantizer existingQuantiles, ScalarQuantizer newQuantiles) { + float tol = REQUANTIZATION_LIMIT * (newQuantiles.getUpperQuantile() - newQuantiles.getLowerQuantile()) / 128f; + if (Math.abs(existingQuantiles.getUpperQuantile() - newQuantiles.getUpperQuantile()) > tol) { + return true; + } + return Math.abs(existingQuantiles.getLowerQuantile() - newQuantiles.getLowerQuantile()) > tol; + } + + private static class QuantizedByteVectorValueSub extends DocIDMerger.Sub { + private final QuantizedByteVectorValues values; + private final KnnVectorValues.DocIndexIterator iterator; + + QuantizedByteVectorValueSub(MergeState.DocMap docMap, QuantizedByteVectorValues values) { + super(docMap); + this.values = values; + iterator = values.iterator(); + assert iterator.docID() == -1; + } + + @Override + public int nextDoc() throws IOException { + return iterator.nextDoc(); + } + + public int index() { + return iterator.index(); + } + } + + private static class QuantizedFloatVectorValues extends QuantizedByteVectorValues { + private final FloatVectorValues values; + private final ScalarQuantizer quantizer; + private final byte[] quantizedVector; + private int lastOrd = -1; + private float offsetValue = 0f; + + private final VectorSimilarityFunction vectorSimilarityFunction; + + QuantizedFloatVectorValues(FloatVectorValues values, VectorSimilarityFunction vectorSimilarityFunction, ScalarQuantizer quantizer) { + this.values = values; + this.quantizer = quantizer; + this.quantizedVector = new byte[values.dimension()]; + this.vectorSimilarityFunction = vectorSimilarityFunction; + } + + @Override + public float getScoreCorrectionConstant(int ord) { + if (ord != lastOrd) { + throw new IllegalStateException( + "attempt to retrieve score correction for different ord " + ord + " than the quantization was done for: " + lastOrd + ); + } + return offsetValue; + } + + @Override + public int dimension() { + return values.dimension(); + } + + @Override + public int size() { + return values.size(); + } + + @Override + public byte[] vectorValue(int ord) throws IOException { + if (ord != lastOrd) { + offsetValue = quantize(ord); + lastOrd = ord; + } + return quantizedVector; + } + + @Override + public VectorScorer scorer(float[] target) throws IOException { + throw new UnsupportedOperationException(); + } + + private float quantize(int ord) throws IOException { + return quantizer.quantize(values.vectorValue(ord), quantizedVector, vectorSimilarityFunction); + } + + @Override + public int ordToDoc(int ord) { + return values.ordToDoc(ord); + } + + @Override + public DocIndexIterator iterator() { + return values.iterator(); + } + } + + private static final class NormalizedFloatVectorValues extends FloatVectorValues { + private final FloatVectorValues values; + private final float[] normalizedVector; + + NormalizedFloatVectorValues(FloatVectorValues values) { + this.values = values; + this.normalizedVector = new float[values.dimension()]; + } + + @Override + public int dimension() { + return values.dimension(); + } + + @Override + public int size() { + return values.size(); + } + + @Override + public int ordToDoc(int ord) { + return values.ordToDoc(ord); + } + + @Override + public float[] vectorValue(int ord) throws IOException { + System.arraycopy(values.vectorValue(ord), 0, normalizedVector, 0, normalizedVector.length); + VectorUtil.l2normalize(normalizedVector); + return normalizedVector; + } + + @Override + public DocIndexIterator iterator() { + return values.iterator(); + } + + @Override + public NormalizedFloatVectorValues copy() throws IOException { + return new NormalizedFloatVectorValues(values.copy()); + } + } + + private static final class OffsetCorrectedQuantizedByteVectorValues extends QuantizedByteVectorValues { + private final QuantizedByteVectorValues in; + private final VectorSimilarityFunction vectorSimilarityFunction; + private final ScalarQuantizer scalarQuantizer, oldScalarQuantizer; + + OffsetCorrectedQuantizedByteVectorValues( + QuantizedByteVectorValues in, + VectorSimilarityFunction vectorSimilarityFunction, + ScalarQuantizer scalarQuantizer, + ScalarQuantizer oldScalarQuantizer + ) { + this.in = in; + this.vectorSimilarityFunction = vectorSimilarityFunction; + this.scalarQuantizer = scalarQuantizer; + this.oldScalarQuantizer = oldScalarQuantizer; + } + + @Override + public float getScoreCorrectionConstant(int ord) throws IOException { + return scalarQuantizer.recalculateCorrectiveOffset(in.vectorValue(ord), oldScalarQuantizer, vectorSimilarityFunction); + } + + @Override + public int dimension() { + return in.dimension(); + } + + @Override + public int size() { + return in.size(); + } + + @Override + public byte[] vectorValue(int ord) throws IOException { + return in.vectorValue(ord); + } + + @Override + public int ordToDoc(int ord) { + return in.ordToDoc(ord); + } + + @Override + public DocIndexIterator iterator() { + return in.iterator(); + } + } +} diff --git a/x-pack/plugin/gpu/src/main21/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java b/x-pack/plugin/gpu/src/main21/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java index 01bcde9dcc0e0..f70eee02e302d 100644 --- a/x-pack/plugin/gpu/src/main21/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java +++ b/x-pack/plugin/gpu/src/main21/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java @@ -43,16 +43,17 @@ static CuVSMatrix fromMemorySegment(MemorySegment memorySegment, int size, int d private DatasetUtilsImpl() {} @Override - public CuVSMatrix fromInput(MemorySegmentAccessInput input, int numVectors, int dims) throws IOException { + public CuVSMatrix fromInput(MemorySegmentAccessInput input, int numVectors, int dims, CuVSMatrix.DataType dataType) throws IOException { if (numVectors < 0 || dims < 0) { throwIllegalArgumentException(numVectors, dims); } MemorySegment ms = input.segmentSliceOrNull(0L, input.length()); + final int byteSize = dataType == CuVSMatrix.DataType.FLOAT ? Float.BYTES : Byte.BYTES; assert ms != null; // TODO: this can be null if larger than 16GB or ... - if (((long) numVectors * dims * Float.BYTES) > ms.byteSize()) { + if (((long) numVectors * dims * byteSize) > ms.byteSize()) { throwIllegalArgumentException(ms, numVectors, dims); } - return fromMemorySegment(ms, numVectors, dims, CuVSMatrix.DataType.FLOAT); + return fromMemorySegment(ms, numVectors, dims, dataType); } static void throwIllegalArgumentException(MemorySegment ms, int numVectors, int dims) { diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsTests.java index 0c9c63257c0e8..6c43843dbd830 100644 --- a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsTests.java +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsTests.java @@ -7,6 +7,8 @@ package org.elasticsearch.xpack.gpu.codec; +import com.nvidia.cuvs.CuVSMatrix; + import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.MMapDirectory; @@ -50,7 +52,7 @@ public void testBasic() throws Exception { } try ( var in = dir.openInput("vector.data", IOContext.DEFAULT); - var dataset = datasetUtils.fromInput((MemorySegmentAccessInput) in, numVecs, dims) + var dataset = datasetUtils.fromInput((MemorySegmentAccessInput) in, numVecs, dims, CuVSMatrix.DataType.FLOAT) ) { assertEquals(numVecs, dataset.size()); assertEquals(dims, dataset.columns()); @@ -62,8 +64,8 @@ public void testBasic() throws Exception { public void testIllegal() { MemorySegmentAccessInput in = null; // TODO: make this non-null - expectThrows(IAE, () -> datasetUtils.fromInput(in, -1, 1)); - expectThrows(IAE, () -> datasetUtils.fromInput(in, 1, -1)); + expectThrows(IAE, () -> datasetUtils.fromInput(in, -1, 1, CuVSMatrix.DataType.FLOAT)); + expectThrows(IAE, () -> datasetUtils.fromInput(in, 1, -1, CuVSMatrix.DataType.FLOAT)); } float[] randomVector(int dims) { From ebb36e0eb2abf455068e372d16912f443a746b70 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Tue, 2 Sep 2025 16:22:16 -0400 Subject: [PATCH 058/109] Fix for int8_hnsw when the number of vectors < MIN_NUM_VECTORS_FOR_GPU_BUILD after merge --- .../gpu/codec/ESGpuHnswVectorsWriter.java | 78 ++++--------------- 1 file changed, 14 insertions(+), 64 deletions(-) diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java index 6b738dfeabce9..b8215e4fbc702 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java @@ -242,18 +242,14 @@ private void writeFieldInternal(FieldInfo fieldInfo, DatasetOrVectors datasetOrV long vectorIndexOffset = vectorIndex.getFilePointer(); int[][] graphLevelNodeOffsets = new int[1][]; HnswGraph mockGraph; - if (datasetOrVectors.vectors != null) { - float[][] vectors = datasetOrVectors.vectors; + if (datasetOrVectors.getVectors() != null) { + int size = datasetOrVectors.size(); if (logger.isDebugEnabled()) { - logger.debug( - "Skip building carga index; vectors length {} < {} (min for GPU)", - vectors.length, - MIN_NUM_VECTORS_FOR_GPU_BUILD - ); + logger.debug("Skip building carga index; vectors length {} < {} (min for GPU)", size, MIN_NUM_VECTORS_FOR_GPU_BUILD); } - mockGraph = writeGraph(vectors, graphLevelNodeOffsets); + mockGraph = writeGraph(size, graphLevelNodeOffsets); } else { - var dataset = datasetOrVectors.dataset; + var dataset = datasetOrVectors.getDataset(); var cuVSResources = cuVSResourceManager.acquire((int) dataset.size(), (int) dataset.columns()); try { try (var index = buildGPUIndex(cuVSResources, fieldInfo.getVectorSimilarityFunction(), dataset)) { @@ -340,13 +336,12 @@ private HnswGraph writeGraph(CuVSMatrix cagraGraph, int[][] levelNodeOffsets) th return createMockGraph(maxElementCount, maxGraphDegree); } - // create a graph where every node is connected to every other node - private HnswGraph writeGraph(float[][] vectors, int[][] levelNodeOffsets) throws IOException { - if (vectors.length == 0) { + // create a mock graph where every node is connected to every other node + private HnswGraph writeGraph(int elementCount, int[][] levelNodeOffsets) throws IOException { + if (elementCount == 0) { return null; } - int elementCount = vectors.length; - int nodeDegree = vectors.length - 1; + int nodeDegree = elementCount - 1; levelNodeOffsets[0] = new int[elementCount]; int[] neighbors = new int[nodeDegree]; @@ -447,9 +442,11 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE .fromInput(memorySegmentAccessInput, numVectors, fieldInfo.getVectorDimension(), dataType); datasetOrVectors = DatasetOrVectors.fromDataset(ds); } else { - // TODO fix for byte vectors - var fa = copyVectorsIntoArray(in, fieldInfo, numVectors); - datasetOrVectors = DatasetOrVectors.fromArray(fa); + assert numVectors < MIN_NUM_VECTORS_FOR_GPU_BUILD : "numVectors: " + numVectors; + // we don't really need real value for vectors here, + // we just build a mock graph where every node is connected to every other node + float[][] vectors = new float[numVectors][fieldInfo.getVectorDimension()]; + datasetOrVectors = DatasetOrVectors.fromArray(vectors); } writeFieldInternal(fieldInfo, datasetOrVectors); } finally { @@ -482,17 +479,6 @@ private static int writeByteVectorValues(IndexOutput out, ByteVectorValues vecto return numVectors; } - static float[][] copyVectorsIntoArray(IndexInput in, FieldInfo fieldInfo, int numVectors) throws IOException { - final FloatVectorValues floatVectorValues = getFloatVectorValues(fieldInfo, in, numVectors); - float[][] vectors = new float[numVectors][fieldInfo.getVectorDimension()]; - float[] vector; - for (int i = 0; i < numVectors; i++) { - vector = floatVectorValues.vectorValue(i); - System.arraycopy(vector, 0, vectors[i], 0, vector.length); - } - return vectors; - } - private static int writeFloatVectorValues(FieldInfo fieldInfo, IndexOutput out, FloatVectorValues floatVectorValues) throws IOException { int numVectors = 0; @@ -507,42 +493,6 @@ private static int writeFloatVectorValues(FieldInfo fieldInfo, IndexOutput out, return numVectors; } - private static FloatVectorValues getFloatVectorValues(FieldInfo fieldInfo, IndexInput randomAccessInput, int numVectors) { - if (numVectors == 0) { - return FloatVectorValues.fromFloats(List.of(), fieldInfo.getVectorDimension()); - } - final long length = (long) Float.BYTES * fieldInfo.getVectorDimension(); - final float[] vector = new float[fieldInfo.getVectorDimension()]; - return new FloatVectorValues() { - @Override - public float[] vectorValue(int ord) throws IOException { - randomAccessInput.seek(ord * length); - randomAccessInput.readFloats(vector, 0, vector.length); - return vector; - } - - @Override - public FloatVectorValues copy() { - return this; - } - - @Override - public int dimension() { - return fieldInfo.getVectorDimension(); - } - - @Override - public int size() { - return numVectors; - } - - @Override - public int ordToDoc(int ord) { - throw new UnsupportedOperationException("Not implemented"); - } - }; - } - private void writeMeta( FieldInfo field, long vectorIndexOffset, From 83aa729e5f1da881c7f16bfcb8d16859ad6c6ba6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lorenzo=20Dematt=C3=A9?= Date: Wed, 3 Sep 2025 11:28:18 +0200 Subject: [PATCH 059/109] PoolingCuVSResourceManager with memory availability (#133242) --- .../xpack/gpu/codec/CuVSResourceManager.java | 160 +++++++++++++++--- .../gpu/codec/ESGpuHnswVectorsWriter.java | 2 +- .../gpu/codec/CuVSResourceManagerTests.java | 140 +++++++++++++-- 3 files changed, 265 insertions(+), 37 deletions(-) diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManager.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManager.java index e7977f28c9c22..26e4e94ed57ea 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManager.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManager.java @@ -7,14 +7,20 @@ package org.elasticsearch.xpack.gpu.codec; +import com.nvidia.cuvs.CuVSMatrix; import com.nvidia.cuvs.CuVSResources; +import com.nvidia.cuvs.GPUInfoProvider; +import com.nvidia.cuvs.spi.CuVSProvider; +import org.elasticsearch.core.Strings; +import org.elasticsearch.logging.LogManager; +import org.elasticsearch.logging.Logger; import org.elasticsearch.xpack.gpu.GPUSupport; import java.nio.file.Path; import java.util.Objects; -import java.util.concurrent.ArrayBlockingQueue; -import java.util.concurrent.BlockingQueue; +import java.util.concurrent.locks.Condition; +import java.util.concurrent.locks.ReentrantLock; /** * A manager of {@link com.nvidia.cuvs.CuVSResources}. There is one manager per GPU. @@ -44,7 +50,7 @@ public interface CuVSResourceManager { // numVectors and dims are currently unused, but could be used along with GPU metadata, // memory, generation, etc, when acquiring for 10M x 1536 dims, or 100,000 x 128 dims, // to give out a resources or not. - ManagedCuVSResources acquire(int numVectors, int dims) throws InterruptedException; + ManagedCuVSResources acquire(int numVectors, int dims, CuVSMatrix.DataType dataType) throws InterruptedException; /** Marks the resources as finished with regard to compute. */ void finishedComputation(ManagedCuVSResources resources); @@ -65,35 +71,127 @@ static CuVSResourceManager pooling() { */ class PoolingCuVSResourceManager implements CuVSResourceManager { + static final Logger logger = LogManager.getLogger(CuVSResourceManager.class); + + /** A multiplier on input data to account for intermediate and output data size required while processing it */ + static final double GPU_COMPUTATION_MEMORY_FACTOR = 2.0; static final int MAX_RESOURCES = 2; - static final PoolingCuVSResourceManager INSTANCE = new PoolingCuVSResourceManager(MAX_RESOURCES); + static final PoolingCuVSResourceManager INSTANCE = new PoolingCuVSResourceManager( + MAX_RESOURCES, + CuVSProvider.provider().gpuInfoProvider() + ); + + private final ManagedCuVSResources[] pool; + private final int capacity; + private final GPUInfoProvider gpuInfoProvider; + private int createdCount; - final BlockingQueue pool; - final int capacity; - int createdCount; + ReentrantLock lock = new ReentrantLock(); + Condition enoughResourcesCondition = lock.newCondition(); - public PoolingCuVSResourceManager(int capacity) { + public PoolingCuVSResourceManager(int capacity, GPUInfoProvider gpuInfoProvider) { if (capacity < 1 || capacity > MAX_RESOURCES) { throw new IllegalArgumentException("Resource count must be between 1 and " + MAX_RESOURCES); } this.capacity = capacity; - this.pool = new ArrayBlockingQueue<>(capacity); + this.gpuInfoProvider = gpuInfoProvider; + this.pool = new ManagedCuVSResources[MAX_RESOURCES]; } - @Override - public ManagedCuVSResources acquire(int numVectors, int dims) throws InterruptedException { - ManagedCuVSResources res = pool.poll(); - if (res != null) { + private ManagedCuVSResources getResourceFromPool() { + for (int i = 0; i < createdCount; ++i) { + var res = pool[i]; + if (res.locked == false) { + return res; + } + } + if (createdCount < capacity) { + var res = new ManagedCuVSResources(Objects.requireNonNull(createNew())); + pool[createdCount++] = res; return res; } - synchronized (this) { - if (createdCount < capacity) { - createdCount++; - return new ManagedCuVSResources(Objects.requireNonNull(createNew())); + return null; + } + + private int numLockedResources() { + int lockedResources = 0; + for (int i = 0; i < createdCount; ++i) { + var res = pool[i]; + if (res.locked) { + lockedResources++; } } - // Otherwise, wait for one to be released - return pool.take(); + return lockedResources; + } + + @Override + public ManagedCuVSResources acquire(int numVectors, int dims, CuVSMatrix.DataType dataType) throws InterruptedException { + try { + lock.lock(); + + boolean allConditionsMet = false; + ManagedCuVSResources res = null; + while (allConditionsMet == false) { + res = getResourceFromPool(); + + final boolean enoughMemory; + if (res != null) { + long requiredMemoryInBytes = estimateRequiredMemory(numVectors, dims, dataType); + logger.info( + "Estimated memory for [{}] vectors, [{}] dims of type [{}] is [{} B]", + numVectors, + dims, + dataType.name(), + requiredMemoryInBytes + ); + + // Check immutable constraints + long totalDeviceMemoryInBytes = gpuInfoProvider.getCurrentInfo(res).totalDeviceMemoryInBytes(); + if (requiredMemoryInBytes > totalDeviceMemoryInBytes) { + String message = Strings.format( + "Requested GPU memory for [%d] vectors, [%d] dims is greater than the GPU total memory [%d B]", + numVectors, + dims, + totalDeviceMemoryInBytes + ); + logger.error(message); + throw new IllegalArgumentException(message); + } + + // If no resource in the pool is locked, short circuit to avoid livelock + if (numLockedResources() == 0) { + logger.info("No resources currently locked, proceeding"); + break; + } + + // Check resources availability + long freeDeviceMemoryInBytes = gpuInfoProvider.getCurrentInfo(res).freeDeviceMemoryInBytes(); + enoughMemory = requiredMemoryInBytes <= freeDeviceMemoryInBytes; + logger.info("Free device memory [{} B], enoughMemory[{}]", freeDeviceMemoryInBytes); + } else { + logger.info("No resources available in pool"); + enoughMemory = false; + } + // TODO: add enoughComputation / enoughComputationCondition here + allConditionsMet = enoughMemory; // && enoughComputation + if (allConditionsMet == false) { + enoughResourcesCondition.await(); + } + } + res.locked = true; + return res; + } finally { + lock.unlock(); + } + } + + private long estimateRequiredMemory(int numVectors, int dims, CuVSMatrix.DataType dataType) { + int elementTypeBytes = switch (dataType) { + case FLOAT -> Float.BYTES; + case INT, UINT -> Integer.BYTES; + case BYTE -> Byte.BYTES; + }; + return (long) (GPU_COMPUTATION_MEMORY_FACTOR * numVectors * dims * elementTypeBytes); } // visible for testing @@ -103,21 +201,31 @@ protected CuVSResources createNew() { @Override public void finishedComputation(ManagedCuVSResources resources) { + logger.info("Computation finished"); // currently does nothing, but could allow acquire to return possibly blocked resources + // enoughResourcesCondition.signalAll() } @Override public void release(ManagedCuVSResources resources) { - var added = pool.offer(Objects.requireNonNull(resources)); - assert added : "Failed to release resource back to pool"; + logger.info("Releasing resources to pool"); + try { + lock.lock(); + assert resources.locked; + resources.locked = false; + enoughResourcesCondition.signalAll(); + } finally { + lock.unlock(); + } } @Override public void shutdown() { - for (ManagedCuVSResources res : pool) { + for (int i = 0; i < createdCount; ++i) { + var res = pool[i]; + assert res != null; res.delegate.close(); } - pool.clear(); } } @@ -125,6 +233,7 @@ public void shutdown() { final class ManagedCuVSResources implements CuVSResources { final CuVSResources delegate; + boolean locked = false; ManagedCuVSResources(CuVSResources resources) { this.delegate = resources; @@ -135,6 +244,11 @@ public ScopedAccess access() { return delegate.access(); } + @Override + public int deviceId() { + return delegate.deviceId(); + } + @Override public void close() { throw new UnsupportedOperationException("this resource is managed, cannot be closed by clients"); diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java index b8215e4fbc702..5a166fd2eeac0 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java @@ -250,7 +250,7 @@ private void writeFieldInternal(FieldInfo fieldInfo, DatasetOrVectors datasetOrV mockGraph = writeGraph(size, graphLevelNodeOffsets); } else { var dataset = datasetOrVectors.getDataset(); - var cuVSResources = cuVSResourceManager.acquire((int) dataset.size(), (int) dataset.columns()); + var cuVSResources = cuVSResourceManager.acquire((int) dataset.size(), (int) dataset.columns(), dataset.dataType()); try { try (var index = buildGPUIndex(cuVSResources, fieldInfo.getVectorSimilarityFunction(), dataset)) { assert index != null : "GPU index should be built for field: " + fieldInfo.name; diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManagerTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManagerTests.java index a5bac96cc3b51..b466f37cbe9c9 100644 --- a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManagerTests.java +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManagerTests.java @@ -7,29 +7,44 @@ package org.elasticsearch.xpack.gpu.codec; +import com.nvidia.cuvs.CuVSMatrix; import com.nvidia.cuvs.CuVSResources; +import com.nvidia.cuvs.CuVSResourcesInfo; +import com.nvidia.cuvs.GPUInfo; +import com.nvidia.cuvs.GPUInfoProvider; +import org.elasticsearch.logging.LogManager; +import org.elasticsearch.logging.Logger; import org.elasticsearch.test.ESTestCase; import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; +import java.util.function.LongSupplier; import static org.hamcrest.Matchers.anyOf; import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.not; public class CuVSResourceManagerTests extends ESTestCase { + private static final Logger log = LogManager.getLogger(CuVSResourceManagerTests.class); + + public static final long TOTAL_DEVICE_MEMORY_IN_BYTES = 256L * 1024 * 1024; + public void testBasic() throws InterruptedException { var mgr = new MockPoolingCuVSResourceManager(2); - var res1 = mgr.acquire(0, 0); - var res2 = mgr.acquire(0, 0); + var res1 = mgr.acquire(0, 0, CuVSMatrix.DataType.FLOAT); + var res2 = mgr.acquire(0, 0, CuVSMatrix.DataType.FLOAT); assertThat(res1.toString(), containsString("id=0")); assertThat(res2.toString(), containsString("id=1")); mgr.release(res1); mgr.release(res2); - res1 = mgr.acquire(0, 0); - res2 = mgr.acquire(0, 0); + res1 = mgr.acquire(0, 0, CuVSMatrix.DataType.FLOAT); + res2 = mgr.acquire(0, 0, CuVSMatrix.DataType.FLOAT); assertThat(res1.toString(), containsString("id=0")); assertThat(res2.toString(), containsString("id=1")); mgr.release(res1); @@ -39,13 +54,13 @@ public void testBasic() throws InterruptedException { public void testBlocking() throws Exception { var mgr = new MockPoolingCuVSResourceManager(2); - var res1 = mgr.acquire(0, 0); - var res2 = mgr.acquire(0, 0); + var res1 = mgr.acquire(0, 0, CuVSMatrix.DataType.FLOAT); + var res2 = mgr.acquire(0, 0, CuVSMatrix.DataType.FLOAT); AtomicReference holder = new AtomicReference<>(); Thread t = new Thread(() -> { try { - var res3 = mgr.acquire(0, 0); + var res3 = mgr.acquire(0, 0, CuVSMatrix.DataType.FLOAT); holder.set(res3); } catch (InterruptedException e) { throw new AssertionError(e); @@ -60,18 +75,60 @@ public void testBlocking() throws Exception { mgr.shutdown(); } + public void testBlockingOnInsufficientMemory() throws Exception { + var mgr = new MockPoolingCuVSResourceManager(2); + var res1 = mgr.acquire(16 * 1024, 1024, CuVSMatrix.DataType.FLOAT); + + AtomicReference holder = new AtomicReference<>(); + Thread t = new Thread(() -> { + try { + var res2 = mgr.acquire((16 * 1024) + 1, 1024, CuVSMatrix.DataType.FLOAT); + holder.set(res2); + } catch (InterruptedException e) { + throw new AssertionError(e); + } + }); + t.start(); + Thread.sleep(1_000); + assertNull(holder.get()); + mgr.release(res1); + t.join(); + assertThat(holder.get().toString(), anyOf(containsString("id=0"), containsString("id=1"))); + mgr.shutdown(); + } + + public void testNotBlockingOnSufficientMemory() throws Exception { + var mgr = new MockPoolingCuVSResourceManager(2); + var res1 = mgr.acquire(16 * 1024, 1024, CuVSMatrix.DataType.FLOAT); + + AtomicReference holder = new AtomicReference<>(); + Thread t = new Thread(() -> { + try { + var res2 = mgr.acquire((16 * 1024) - 1, 1024, CuVSMatrix.DataType.FLOAT); + holder.set(res2); + } catch (InterruptedException e) { + throw new AssertionError(e); + } + }); + t.start(); + t.join(5_000); + assertNotNull(holder.get()); + assertThat(holder.get().toString(), not(equalTo(res1.toString()))); + mgr.shutdown(); + } + public void testManagedResIsNotClosable() throws Exception { var mgr = new MockPoolingCuVSResourceManager(1); - var res = mgr.acquire(0, 0); - assertThrows(UnsupportedOperationException.class, () -> res.close()); + var res = mgr.acquire(0, 0, CuVSMatrix.DataType.FLOAT); + assertThrows(UnsupportedOperationException.class, res::close); mgr.release(res); mgr.shutdown(); } public void testDoubleRelease() throws InterruptedException { var mgr = new MockPoolingCuVSResourceManager(2); - var res1 = mgr.acquire(0, 0); - var res2 = mgr.acquire(0, 0); + var res1 = mgr.acquire(0, 0, CuVSMatrix.DataType.FLOAT); + var res2 = mgr.acquire(0, 0, CuVSMatrix.DataType.FLOAT); mgr.release(res1); mgr.release(res2); assertThrows(AssertionError.class, () -> mgr.release(randomFrom(res1, res2))); @@ -80,16 +137,45 @@ public void testDoubleRelease() throws InterruptedException { static class MockPoolingCuVSResourceManager extends CuVSResourceManager.PoolingCuVSResourceManager { - final AtomicInteger idGenerator = new AtomicInteger(); + private final AtomicInteger idGenerator = new AtomicInteger(); + private final List allocations; MockPoolingCuVSResourceManager(int capacity) { - super(capacity); + this(capacity, new ArrayList<>()); + } + + private MockPoolingCuVSResourceManager(int capacity, List allocationList) { + super(capacity, new MockGPUInfoProvider(() -> freeMemoryFunction(allocationList))); + this.allocations = allocationList; + } + + private static long freeMemoryFunction(List allocations) { + return TOTAL_DEVICE_MEMORY_IN_BYTES - allocations.stream().mapToLong(x -> x).sum(); } @Override protected CuVSResources createNew() { return new MockCuVSResources(idGenerator.getAndIncrement()); } + + @Override + public ManagedCuVSResources acquire(int numVectors, int dims, CuVSMatrix.DataType dataType) throws InterruptedException { + var res = super.acquire(numVectors, dims, dataType); + long memory = (long) (numVectors * dims * Float.BYTES + * CuVSResourceManager.PoolingCuVSResourceManager.GPU_COMPUTATION_MEMORY_FACTOR); + allocations.add(memory); + log.info("Added [{}]", memory); + return res; + } + + @Override + public void release(ManagedCuVSResources resources) { + if (allocations.isEmpty() == false) { + var x = allocations.removeLast(); + log.info("Removed [{}]", x); + } + super.release(resources); + } } static class MockCuVSResources implements CuVSResources { @@ -105,6 +191,11 @@ public ScopedAccess access() { throw new UnsupportedOperationException(); } + @Override + public int deviceId() { + return 0; + } + @Override public void close() {} @@ -118,4 +209,27 @@ public String toString() { return "MockCuVSResources[id=" + id + "]"; } } + + private static class MockGPUInfoProvider implements GPUInfoProvider { + private final LongSupplier freeMemorySupplier; + + MockGPUInfoProvider(LongSupplier freeMemorySupplier) { + this.freeMemorySupplier = freeMemorySupplier; + } + + @Override + public List availableGPUs() { + throw new UnsupportedOperationException(); + } + + @Override + public List compatibleGPUs() { + throw new UnsupportedOperationException(); + } + + @Override + public CuVSResourcesInfo getCurrentInfo(CuVSResources cuVSResources) { + return new CuVSResourcesInfo(freeMemorySupplier.getAsLong(), TOTAL_DEVICE_MEMORY_IN_BYTES); + } + } } From 3cd440517921bdfad7913da7e57ce8139fab47e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lorenzo=20Dematt=C3=A9?= Date: Wed, 3 Sep 2025 12:19:00 +0200 Subject: [PATCH 060/109] Remove assertion to fix tests (#134035) --- .../elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java | 2 +- .../xpack/gpu/codec/ESGpuHnswVectorsFormatTests.java | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java index 5a166fd2eeac0..df9b47ee5c62d 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java @@ -442,7 +442,7 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE .fromInput(memorySegmentAccessInput, numVectors, fieldInfo.getVectorDimension(), dataType); datasetOrVectors = DatasetOrVectors.fromDataset(ds); } else { - assert numVectors < MIN_NUM_VECTORS_FOR_GPU_BUILD : "numVectors: " + numVectors; + // assert numVectors < MIN_NUM_VECTORS_FOR_GPU_BUILD : "numVectors: " + numVectors; // we don't really need real value for vectors here, // we just build a mock graph where every node is connected to every other node float[][] vectors = new float[numVectors][fieldInfo.getVectorDimension()]; diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsFormatTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsFormatTests.java index de9f84cd91179..77e758cda78c5 100644 --- a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsFormatTests.java +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsFormatTests.java @@ -10,11 +10,14 @@ import org.apache.lucene.index.VectorEncoding; import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; +import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.lucene.tests.util.TestUtil; import org.elasticsearch.common.logging.LogConfigurator; import org.elasticsearch.xpack.gpu.GPUSupport; import org.junit.BeforeClass; +// CuVS prints tons of logs to stdout +@LuceneTestCase.SuppressSysoutChecks(bugUrl = "https://github.com/rapidsai/cuvs/issues/1310") public class ESGpuHnswVectorsFormatTests extends BaseKnnVectorsFormatTestCase { static { From ad34a1c311c97295639988f967bc1c03a342134a Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Wed, 3 Sep 2025 13:18:55 -0400 Subject: [PATCH 061/109] Fix spotless --- .../index/mapper/vectors/DenseVectorFieldMapper.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java index 6be7c6d5a55bb..7450b510c855a 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java @@ -259,7 +259,12 @@ public static class Builder extends FieldMapper.Builder { final boolean isExcludeSourceVectors; private final List vectorsFormatProviders; - public Builder(String name, IndexVersion indexVersionCreated, boolean isExcludeSourceVectors, List vectorsFormatProviders) { + public Builder( + String name, + IndexVersion indexVersionCreated, + boolean isExcludeSourceVectors, + List vectorsFormatProviders + ) { super(name); this.indexVersionCreated = indexVersionCreated; this.vectorsFormatProviders = vectorsFormatProviders; From d8e6826ca305e9596cc05d6744a12a8a518111e8 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Wed, 3 Sep 2025 15:46:02 -0400 Subject: [PATCH 062/109] Modify GPUDenseVectorFieldMapperTests to extend DenseVectorFieldMapperTests --- ...bstractDenseVectorFieldMapperTestcase.java | 1430 ----------------- .../codec/GPUDenseVectorFieldMapperTests.java | 12 +- 2 files changed, 8 insertions(+), 1434 deletions(-) delete mode 100644 test/framework/src/main/java/org/elasticsearch/index/mapper/AbstractDenseVectorFieldMapperTestcase.java diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/AbstractDenseVectorFieldMapperTestcase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/AbstractDenseVectorFieldMapperTestcase.java deleted file mode 100644 index 4a78b61c37d31..0000000000000 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/AbstractDenseVectorFieldMapperTestcase.java +++ /dev/null @@ -1,1430 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -package org.elasticsearch.index.mapper; - -import org.apache.lucene.search.FieldExistsQuery; -import org.apache.lucene.search.Query; -import org.elasticsearch.index.IndexVersion; -import org.elasticsearch.index.IndexVersions; -import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; -import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.DenseVectorFieldType; -import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.ElementType; -import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.VectorSimilarity; -import org.elasticsearch.index.query.SearchExecutionContext; -import org.elasticsearch.search.lookup.Source; -import org.elasticsearch.search.lookup.SourceProvider; -import org.elasticsearch.test.ESTestCase; -import org.elasticsearch.xcontent.XContentBuilder; -import org.junit.AssumptionViolatedException; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Set; - -import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.instanceOf; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -public abstract class AbstractDenseVectorFieldMapperTestcase extends MapperTestCase { - - protected static final IndexVersion INDEXED_BY_DEFAULT_PREVIOUS_INDEX_VERSION = IndexVersions.V_8_10_0; - protected final ElementType elementType; - protected final boolean indexed; - protected final boolean indexOptionsSet; - protected final int dims; - - protected AbstractDenseVectorFieldMapperTestcase() { - this.elementType = randomFrom(ElementType.BYTE, ElementType.FLOAT, ElementType.BIT); - this.indexed = randomBoolean(); - this.indexOptionsSet = this.indexed && randomBoolean(); - this.dims = ElementType.BIT == elementType ? 4 * Byte.SIZE : 4; - } - - @Override - protected void minimalMapping(XContentBuilder b) throws IOException { - indexMapping(b, IndexVersion.current()); - } - - @Override - protected void minimalMapping(XContentBuilder b, IndexVersion indexVersion) throws IOException { - indexMapping(b, indexVersion); - } - - protected void indexMapping(XContentBuilder b, IndexVersion indexVersion) throws IOException { - b.field("type", "dense_vector").field("dims", dims); - if (elementType != ElementType.FLOAT) { - b.field("element_type", elementType.toString()); - } - if (indexVersion.onOrAfter(DenseVectorFieldMapper.INDEXED_BY_DEFAULT_INDEX_VERSION) || indexed) { - // Serialize if it's new index version, or it was not the default for previous indices - b.field("index", indexed); - } - if (indexVersion.onOrAfter(DenseVectorFieldMapper.DEFAULT_TO_INT8) - && indexed - && elementType.equals(ElementType.FLOAT) - && indexOptionsSet == false) { - b.startObject("index_options"); - b.field("type", "int8_hnsw"); - b.field("m", 16); - b.field("ef_construction", 100); - b.endObject(); - } - if (indexed) { - b.field("similarity", elementType == ElementType.BIT ? "l2_norm" : "dot_product"); - if (indexOptionsSet) { - b.startObject("index_options"); - b.field("type", "hnsw"); - b.field("m", 5); - b.field("ef_construction", 50); - b.endObject(); - } - } - } - - @Override - protected Object getSampleValueForDocument() { - return elementType == ElementType.FLOAT ? List.of(0.5, 0.5, 0.5, 0.5) : List.of((byte) 1, (byte) 1, (byte) 1, (byte) 1); - } - - @Override - protected void registerParameters(ParameterChecker checker) throws IOException { - checker.registerConflictCheck( - "dims", - fieldMapping(b -> b.field("type", "dense_vector").field("dims", dims)), - fieldMapping(b -> b.field("type", "dense_vector").field("dims", dims + 8)) - ); - checker.registerConflictCheck( - "similarity", - fieldMapping(b -> b.field("type", "dense_vector").field("dims", dims).field("index", true).field("similarity", "dot_product")), - fieldMapping(b -> b.field("type", "dense_vector").field("dims", dims).field("index", true).field("similarity", "l2_norm")) - ); - checker.registerConflictCheck( - "index", - fieldMapping(b -> b.field("type", "dense_vector").field("dims", dims).field("index", true).field("similarity", "dot_product")), - fieldMapping(b -> b.field("type", "dense_vector").field("dims", dims).field("index", false)) - ); - checker.registerConflictCheck( - "element_type", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .field("similarity", "dot_product") - .field("element_type", "byte") - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .field("similarity", "dot_product") - .field("element_type", "float") - ) - ); - checker.registerConflictCheck( - "element_type", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .field("similarity", "l2_norm") - .field("element_type", "float") - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 8) - .field("index", true) - .field("similarity", "l2_norm") - .field("element_type", "bit") - ) - ); - checker.registerConflictCheck( - "element_type", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .field("similarity", "l2_norm") - .field("element_type", "byte") - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 8) - .field("index", true) - .field("similarity", "l2_norm") - .field("element_type", "bit") - ) - ); - // update for flat - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "flat") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_flat") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"int8_flat\"")) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "flat") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_flat") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"int4_flat\"")) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "flat") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "hnsw") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"hnsw\"")) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "flat") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_hnsw") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"int8_hnsw\"")) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "flat") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_hnsw") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"int4_hnsw\"")) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "flat") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_flat") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"bbq_flat\"")) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "flat") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_hnsw") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"bbq_hnsw\"")) - ); - // update for int8_flat - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_flat") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "hnsw") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"hnsw\"")) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_flat") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_hnsw") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"int8_hnsw\"")) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_flat") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_hnsw") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"int4_hnsw\"")) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_flat") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_flat") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"int4_flat\"")) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_flat") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "flat") - .endObject() - ) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "int8_flat") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_flat") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"bbq_flat\"")) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "int8_flat") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_hnsw") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"bbq_hnsw\"")) - ); - // update for hnsw - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "hnsw") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_hnsw") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"int8_hnsw\"")) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "hnsw") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_hnsw") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"int4_hnsw\"")) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "hnsw") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "hnsw") - .field("m", 100) - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"hnsw\"")) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "hnsw") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "flat") - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "hnsw") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_flat") - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "hnsw") - .field("m", 32) - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "hnsw") - .field("m", 16) - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "hnsw") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_flat") - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "hnsw") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_flat") - .endObject() - ) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "hnsw") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_hnsw") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"bbq_hnsw\"")) - ); - // update for int8_hnsw - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_hnsw") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_hnsw") - .field("m", 256) - .endObject(), - m -> assertTrue(m.toString().contains("\"m\":256")) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_hnsw") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_hnsw") - .field("m", 256) - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"int4_hnsw\"")) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_hnsw") - .field("m", 32) - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_hnsw") - .field("m", 16) - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_hnsw") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "flat") - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_hnsw") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_flat") - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_hnsw") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_flat") - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "int8_hnsw") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_flat") - .endObject() - ) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "int8_hnsw") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_hnsw") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"bbq_hnsw\"")) - ); - // update for int4_flat - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_flat") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_hnsw") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"int4_hnsw\"")) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_flat") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_hnsw") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"int8_hnsw\"")) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_flat") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "hnsw") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"hnsw\"")) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_flat") - .field("m", 32) - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_flat") - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_flat") - .field("m", 32) - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "flat") - .endObject() - ) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "int4_flat") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_flat") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"bbq_flat\"")) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "int4_flat") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_hnsw") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"bbq_hnsw\"")) - ); - // update for int4_hnsw - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_hnsw") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("m", 256) - .field("type", "int4_hnsw") - .endObject(), - m -> assertTrue(m.toString().contains("\"m\":256")) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_hnsw") - .field("confidence_interval", 0.03) - .field("m", 4) - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_hnsw") - .field("confidence_interval", 0.03) - .field("m", 100) - .endObject(), - m -> assertTrue(m.toString().contains("\"m\":100")) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_hnsw") - .field("m", 32) - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_hnsw") - .field("m", 16) - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_hnsw") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_hnsw") - .field("confidence_interval", 0.3) - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_hnsw") - .field("m", 32) - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_hnsw") - .field("m", 16) - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_hnsw") - .field("m", 32) - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "hnsw") - .field("m", 16) - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_hnsw") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "flat") - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_hnsw") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int8_flat") - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_hnsw") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims) - .field("index", true) - .startObject("index_options") - .field("type", "int4_flat") - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "int4_hnsw") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_flat") - .endObject() - ) - ); - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "int4_hnsw") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_hnsw") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"bbq_hnsw\"")) - ); - // update for bbq_flat - checker.registerUpdateCheck( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_flat") - .endObject(), - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_hnsw") - .endObject(), - m -> assertTrue(m.toString().contains("\"type\":\"bbq_hnsw\"")) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_flat") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "flat") - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_flat") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "int8_flat") - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_flat") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "int4_flat") - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_flat") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "hnsw") - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_flat") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "int8_hnsw") - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_flat") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "int8_hnsw") - .endObject() - ) - ); - // update for bbq_hnsw - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_hnsw") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "flat") - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_hnsw") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "int8_flat") - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_hnsw") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "int4_flat") - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_hnsw") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "hnsw") - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_hnsw") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "int8_hnsw") - .endObject() - ) - ); - checker.registerConflictCheck( - "index_options", - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "bbq_hnsw") - .endObject() - ), - fieldMapping( - b -> b.field("type", "dense_vector") - .field("dims", dims * 16) - .field("index", true) - .startObject("index_options") - .field("type", "int4_hnsw") - .endObject() - ) - ); - } - - @Override - protected boolean supportsStoredFields() { - return false; - } - - @Override - protected boolean supportsIgnoreMalformed() { - return false; - } - - @Override - protected void assertSearchable(MappedFieldType fieldType) { - assertThat(fieldType, instanceOf(DenseVectorFieldType.class)); - assertEquals(fieldType.isIndexed(), indexed); - assertEquals(fieldType.isSearchable(), indexed); - } - - protected void assertExistsQuery(MappedFieldType fieldType, Query query, LuceneDocument fields) { - assertThat(query, instanceOf(FieldExistsQuery.class)); - FieldExistsQuery existsQuery = (FieldExistsQuery) query; - assertEquals("field", existsQuery.getField()); - assertNoFieldNamesField(fields); - } - - // We override this because dense vectors are the only field type that are not aggregatable but - // that do provide fielddata. TODO: resolve this inconsistency! - @Override - public void testAggregatableConsistency() {} - - @Override - protected void assertFetchMany(MapperService mapperService, String field, Object value, String format, int count) throws IOException { - assumeFalse("Dense vectors currently don't support multiple values in the same field", false); - } - - /** - * Dense vectors don't support doc values or string representation (for doc value parser/fetching). - * We may eventually support that, but until then, we only verify that the parsing and fields fetching matches the provided value object - */ - @Override - protected void assertFetch(MapperService mapperService, String field, Object value, String format) throws IOException { - MappedFieldType ft = mapperService.fieldType(field); - MappedFieldType.FielddataOperation fdt = MappedFieldType.FielddataOperation.SEARCH; - SourceToParse source = source(b -> b.field(ft.name(), value)); - SearchExecutionContext searchExecutionContext = mock(SearchExecutionContext.class); - when(searchExecutionContext.isSourceEnabled()).thenReturn(true); - when(searchExecutionContext.sourcePath(field)).thenReturn(Set.of(field)); - when(searchExecutionContext.getForField(ft, fdt)).thenAnswer(inv -> fieldDataLookup(mapperService).apply(ft, () -> { - throw new UnsupportedOperationException(); - }, fdt)); - ValueFetcher nativeFetcher = ft.valueFetcher(searchExecutionContext, format); - ParsedDocument doc = mapperService.documentMapper().parse(source); - withLuceneIndex(mapperService, iw -> iw.addDocuments(doc.docs()), ir -> { - Source s = SourceProvider.fromLookup(mapperService.mappingLookup(), null, mapperService.getMapperMetrics().sourceFieldMetrics()) - .getSource(ir.leaves().get(0), 0); - nativeFetcher.setNextReader(ir.leaves().get(0)); - List fromNative = nativeFetcher.fetchValues(s, 0, new ArrayList<>()); - DenseVectorFieldType denseVectorFieldType = (DenseVectorFieldType) ft; - switch (denseVectorFieldType.getElementType()) { - case BYTE -> { - assumeFalse("byte element type testing not currently added", false); - } - case FLOAT -> { - float[] fetchedFloats = new float[denseVectorFieldType.getVectorDimensions()]; - int i = 0; - for (var f : fromNative) { - assert f instanceof Number; - fetchedFloats[i++] = ((Number) f).floatValue(); - } - assertThat("fetching " + value, fetchedFloats, equalTo(value)); - } - } - }); - } - - @Override - // TODO: add `byte` element_type tests - protected void randomFetchTestFieldConfig(XContentBuilder b) throws IOException { - b.field("type", "dense_vector").field("dims", randomIntBetween(2, 4096)).field("element_type", "float"); - if (randomBoolean()) { - b.field("index", true).field("similarity", randomFrom(VectorSimilarity.values()).toString()); - } - } - - @Override - protected Object generateRandomInputValue(MappedFieldType ft) { - DenseVectorFieldType vectorFieldType = (DenseVectorFieldType) ft; - return switch (vectorFieldType.getElementType()) { - case BYTE -> randomByteArrayOfLength(vectorFieldType.getVectorDimensions()); - case FLOAT -> { - float[] floats = new float[vectorFieldType.getVectorDimensions()]; - float magnitude = 0; - for (int i = 0; i < floats.length; i++) { - float f = randomFloat(); - floats[i] = f; - magnitude += f * f; - } - magnitude = (float) Math.sqrt(magnitude); - if (VectorSimilarity.DOT_PRODUCT.equals(vectorFieldType.getSimilarity())) { - for (int i = 0; i < floats.length; i++) { - floats[i] /= magnitude; - } - } - yield floats; - } - case BIT -> randomByteArrayOfLength(vectorFieldType.getVectorDimensions() / 8); - }; - } - - @Override - protected IngestScriptSupport ingestScriptSupport() { - throw new AssumptionViolatedException("not supported"); - } - - @Override - protected SyntheticSourceSupport syntheticSourceSupport(boolean ignoreMalformed) { - return new DenseVectorSyntheticSourceSupport(); - } - - @Override - protected boolean supportsEmptyInputArray() { - return false; - } - - private static class DenseVectorSyntheticSourceSupport implements SyntheticSourceSupport { - private final int dims = between(5, 1000); - private final ElementType elementType = randomFrom(ElementType.BYTE, ElementType.FLOAT, ElementType.BIT); - private final boolean indexed = randomBoolean(); - private final boolean indexOptionsSet = indexed && randomBoolean(); - - @Override - public SyntheticSourceExample example(int maxValues) throws IOException { - Object value = switch (elementType) { - case BYTE, BIT: - yield randomList(dims, dims, ESTestCase::randomByte); - case FLOAT: - yield randomList(dims, dims, ESTestCase::randomFloat); - }; - return new SyntheticSourceExample(value, value, this::mapping); - } - - private void mapping(XContentBuilder b) throws IOException { - b.field("type", "dense_vector"); - if (elementType == ElementType.BYTE || elementType == ElementType.BIT || randomBoolean()) { - b.field("element_type", elementType.toString()); - } - b.field("dims", elementType == ElementType.BIT ? dims * Byte.SIZE : dims); - if (indexed) { - b.field("index", true); - b.field("similarity", "l2_norm"); - if (indexOptionsSet) { - b.startObject("index_options"); - b.field("type", "hnsw"); - b.field("m", 5); - b.field("ef_construction", 50); - b.endObject(); - } - } else { - b.field("index", false); - } - } - - @Override - public List invalidExample() { - return List.of(); - } - } - - @Override - public void testSyntheticSourceKeepArrays() { - // The mapper expects to parse an array of values by default, it's not compatible with array of arrays. - } -} diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java index da510407f8774..bba622313e7ee 100644 --- a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java @@ -13,8 +13,8 @@ import org.elasticsearch.index.codec.CodecService; import org.elasticsearch.index.codec.LegacyPerFieldMapperCodec; import org.elasticsearch.index.codec.PerFieldMapperCodec; -import org.elasticsearch.index.mapper.AbstractDenseVectorFieldMapperTestcase; import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapperTests; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.xpack.gpu.GPUPlugin; import org.elasticsearch.xpack.gpu.GPUSupport; @@ -26,7 +26,7 @@ import static org.hamcrest.Matchers.instanceOf; -public class GPUDenseVectorFieldMapperTests extends AbstractDenseVectorFieldMapperTestcase { +public class GPUDenseVectorFieldMapperTests extends DenseVectorFieldMapperTests { @Before public void setup() { @@ -39,14 +39,18 @@ protected Collection getPlugins() { return Collections.singletonList(plugin); } - public void testESGPUHnswVectorsFormat() throws IOException { + @Override + public void testKnnVectorsFormat() throws IOException { + // TODO improve test with custom parameters KnnVectorsFormat knnVectorsFormat = getKnnVectorsFormat("hnsw"); String expectedStr = "ESGpuHnswVectorsFormat(name=ESGpuHnswVectorsFormat, " + "maxConn=16, beamWidth=128, flatVectorFormat=Lucene99FlatVectorsFormat)"; assertEquals(expectedStr, knnVectorsFormat.toString()); } - public void testESGpuHnswScalarQuantizedVectorsFormat() throws IOException { + @Override + public void testKnnQuantizedHNSWVectorsFormat() throws IOException { + // TOD improve the test with custom parameters KnnVectorsFormat knnVectorsFormat = getKnnVectorsFormat("int8_hnsw"); String expectedStr = "ESGPUHnswScalarQuantizedVectorsFormat(name=ESGPUHnswScalarQuantizedVectorsFormat, " + "maxConn=16, beamWidth=128, flatVectorFormat=ES814ScalarQuantizedVectorsFormat"; From 9b56b5e826ff1a0095eb897533398b93ce0a5811 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Fri, 5 Sep 2025 10:40:02 -0400 Subject: [PATCH 063/109] Add tests for int8_hnsw --- .../elasticsearch/plugin/gpu/GPUIndexIT.java | 65 ++++++-- .../rest-api-spec/test/gpu/10_hnsw.yml | 61 +++++--- .../rest-api-spec/test/gpu/20_int8_hnsw.yml | 148 ++++++++++++++++++ 3 files changed, 240 insertions(+), 34 deletions(-) create mode 100644 x-pack/plugin/gpu/src/yamlRestTest/resources/rest-api-spec/test/gpu/20_int8_hnsw.yml diff --git a/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java index d1eb51b1f0f00..0c7031d431012 100644 --- a/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java +++ b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java @@ -24,6 +24,7 @@ import java.util.Locale; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailuresAndResponse; @LuceneTestCase.SuppressCodecs("*") // use our custom codec @@ -104,6 +105,37 @@ public void testSortedIndexReturnsSameResultsAsUnsorted() { searchResponse1.decRef(); searchResponse2.decRef(); } + + // Force merge and search again + assertNoFailures(indicesAdmin().prepareForceMerge(indexName1).get()); + assertNoFailures(indicesAdmin().prepareForceMerge(indexName2).get()); + ensureGreen(); + + var searchResponse3 = prepareSearch(indexName1).setSize(k) + .setFetchSource(false) + .addFetchField("my_keyword") + .setKnnSearch(List.of(new KnnSearchBuilder("my_vector", queryVector, k, numCandidates, null, null))) + .get(); + + var searchResponse4 = prepareSearch(indexName2).setSize(k) + .setFetchSource(false) + .addFetchField("my_keyword") + .setKnnSearch(List.of(new KnnSearchBuilder("my_vector", queryVector, k, numCandidates, null, null))) + .get(); + + try { + SearchHit[] hits3 = searchResponse3.getHits().getHits(); + SearchHit[] hits4 = searchResponse4.getHits().getHits(); + Assert.assertEquals(hits3.length, hits4.length); + for (int i = 0; i < hits3.length; i++) { + Assert.assertEquals(hits3[i].getId(), hits4[i].getId()); + Assert.assertEquals((String) hits3[i].field("my_keyword").getValue(), (String) hits4[i].field("my_keyword").getValue()); + Assert.assertEquals(hits3[i].getScore(), hits4[i].getScore(), 0.0001f); + } + } finally { + searchResponse3.decRef(); + searchResponse4.decRef(); + } } public void testSearchWithoutGPU() { @@ -131,23 +163,26 @@ private void createIndex(String indexName, int dims, boolean sorted) { if (sorted) { settings.put("index.sort.field", "my_keyword"); } - assertAcked(prepareCreate(indexName).setSettings(settings.build()).setMapping(String.format(Locale.ROOT, """ - { - "properties": { - "my_vector": { - "type": "dense_vector", - "dims": %d, - "similarity": "l2_norm", - "index_options": { - "type": "hnsw" - } - }, - "my_keyword": { - "type": "keyword" - } + + String type = randomFrom("hnsw", "int8_hnsw"); + String mapping = String.format(Locale.ROOT, """ + { + "properties": { + "my_vector": { + "type": "dense_vector", + "dims": %d, + "similarity": "l2_norm", + "index_options": { + "type": "%s" } + }, + "my_keyword": { + "type": "keyword" } - """, dims))); + } + } + """, dims, type); + assertAcked(prepareCreate(indexName).setSettings(settings.build()).setMapping(mapping)); ensureGreen(); } diff --git a/x-pack/plugin/gpu/src/yamlRestTest/resources/rest-api-spec/test/gpu/10_hnsw.yml b/x-pack/plugin/gpu/src/yamlRestTest/resources/rest-api-spec/test/gpu/10_hnsw.yml index 978eaa354a4cd..28cce941f0916 100644 --- a/x-pack/plugin/gpu/src/yamlRestTest/resources/rest-api-spec/test/gpu/10_hnsw.yml +++ b/x-pack/plugin/gpu/src/yamlRestTest/resources/rest-api-spec/test/gpu/10_hnsw.yml @@ -14,7 +14,7 @@ properties: embedding: type: dense_vector - dims: 128 + dims: 24 similarity: l2_norm index_options: type: hnsw @@ -32,15 +32,15 @@ - index: _id: "1" - text: "First document" - embedding: [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8] + embedding: [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1] - index: _id: "2" - text: "Second document" - embedding: [0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 1.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 1.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 1.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 1.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 1.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 1.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 1.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 1.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 1.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 1.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 1.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 1.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85] + embedding: [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2] - index: _id: "3" - text: "Third document" - embedding: [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] + embedding: [0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3] - match: { errors: false } - do: @@ -51,19 +51,19 @@ - index: _id: "4" - text: "Fourth document" - embedding: [0.05, 0.12, 0.18, 0.22, 0.29, 0.33, 0.41, 0.47, 0.53, 0.59, 0.65, 0.71, 0.77, 0.83, 0.89, 0.95, 0.11, 0.17, 0.23, 0.29, 0.35, 0.41, 0.47, 0.53, 0.59, 0.65, 0.71, 0.77, 0.83, 0.89, 0.95, 0.01, 0.07, 0.13, 0.19, 0.25, 0.31, 0.37, 0.43, 0.49, 0.55, 0.61, 0.67, 0.73, 0.79, 0.85, 0.91, 0.97, 0.03, 0.09, 0.15, 0.21, 0.27, 0.33, 0.39, 0.45, 0.51, 0.57, 0.63, 0.69, 0.75, 0.81, 0.87, 0.93, 0.99, 0.05, 0.11, 0.17, 0.23, 0.29, 0.35, 0.41, 0.47, 0.53, 0.59, 0.65, 0.71, 0.77, 0.83, 0.89, 0.95, 0.01, 0.07, 0.13, 0.19, 0.25, 0.31, 0.37, 0.43, 0.49, 0.55, 0.61, 0.67, 0.73, 0.79, 0.85, 0.91, 0.97, 0.03, 0.09, 0.15, 0.21, 0.27, 0.33, 0.39, 0.45, 0.51, 0.57, 0.63, 0.69, 0.75, 0.81, 0.87, 0.93, 0.99, 0.05, 0.11, 0.17, 0.23, 0.29, 0.35, 0.41, 0.47, 0.53, 0.59, 0.65, 0.71, 0.46] + embedding: [0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4] - index: _id: "5" - text: "Fifth document" - embedding: [0.21, 0.31, 0.41, 0.51, 0.61, 0.71, 0.81, 0.91, 0.13, 0.23, 0.33, 0.43, 0.53, 0.63, 0.73, 0.83, 0.93, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 0.17, 0.27, 0.37, 0.47, 0.57, 0.67, 0.77, 0.87, 0.97, 0.19, 0.29, 0.39, 0.49, 0.59, 0.69, 0.79, 0.89, 0.99, 0.11, 0.21, 0.31, 0.41, 0.51, 0.61, 0.71, 0.81, 0.91, 0.13, 0.23, 0.33, 0.43, 0.53, 0.63, 0.73, 0.83, 0.93, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 0.17, 0.27, 0.37, 0.47, 0.57, 0.67, 0.77, 0.87, 0.97, 0.19, 0.29, 0.39, 0.49, 0.59, 0.69, 0.79, 0.89, 0.99, 0.11, 0.21, 0.31, 0.41, 0.51, 0.61, 0.71, 0.81, 0.91, 0.13, 0.23, 0.33, 0.43, 0.53, 0.63, 0.73, 0.83, 0.93, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 0.17, 0.27, 0.37, 0.47, 0.57, 0.67, 0.77, 0.87, 0.97, 0.19, 0.29, 0.39] + embedding: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] - index: _id: "6" - text: "Sixth document" - embedding: [0.12, 0.22, 0.32, 0.42, 0.52, 0.62, 0.72, 0.82, 0.92, 0.14, 0.24, 0.34, 0.44, 0.54, 0.64, 0.74, 0.84, 0.94, 0.16, 0.26, 0.36, 0.46, 0.56, 0.66, 0.76, 0.86, 0.96, 0.18, 0.28, 0.38, 0.48, 0.58, 0.68, 0.78, 0.88, 0.98, 0.11, 0.21, 0.31, 0.41, 0.51, 0.61, 0.71, 0.81, 0.91, 0.13, 0.23, 0.33, 0.43, 0.53, 0.63, 0.73, 0.83, 0.93, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 0.17, 0.27, 0.37, 0.47, 0.57, 0.67, 0.77, 0.87, 0.97, 0.19, 0.29, 0.39, 0.49, 0.59, 0.69, 0.79, 0.89, 0.99, 0.11, 0.21, 0.31, 0.41, 0.51, 0.61, 0.71, 0.81, 0.91, 0.13, 0.23, 0.33, 0.43, 0.53, 0.63, 0.73, 0.83, 0.93, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 0.17, 0.27, 0.37, 0.47, 0.57, 0.67, 0.77, 0.87, 0.97, 0.19, 0.29, 0.39, 0.49, 0.59, 0.69, 0.79, 0.89, 0.29, 0.39, 0.49] + embedding: [0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6] - index: _id: "7" - text: "Seventh document" - embedding: [0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 0.07, 0.17, 0.27, 0.37, 0.47, 0.57, 0.67, 0.77, 0.87, 0.97, 0.09, 0.19, 0.29, 0.39, 0.49, 0.59, 0.69, 0.79, 0.89, 0.99, 0.11, 0.21, 0.31, 0.41, 0.51, 0.61, 0.71, 0.81, 0.91, 0.13, 0.23, 0.33, 0.43, 0.53, 0.63, 0.73, 0.83, 0.93, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 0.17, 0.27, 0.37, 0.47, 0.57, 0.67, 0.77, 0.87, 0.97, 0.19, 0.29, 0.39, 0.49, 0.59, 0.69, 0.79, 0.89, 0.99, 0.11, 0.21, 0.31, 0.41, 0.51, 0.61, 0.71, 0.81, 0.91, 0.13, 0.23, 0.33, 0.43, 0.53, 0.63, 0.73, 0.83, 0.93, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 0.17, 0.27, 0.37, 0.47, 0.57, 0.67, 0.77, 0.87, 0.97, 0.19, 0.29, 0.39, 0.49, 0.59, 0.69, 0.79, 0.89, 0.90] + embedding: [0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7] - match: { errors: false } - do: @@ -72,11 +72,11 @@ body: knn: field: embedding - query_vector: [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8] - k: 10 + query_vector: [0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7] + k: 2 - - match: { hits.total.value: 7 } - - match: { hits.hits.0._id: "1" } + - match: { hits.hits.0._id: "7" } + - match: { hits.hits.1._id: "6" } - do: bulk: @@ -95,10 +95,32 @@ body: knn: field: embedding - query_vector: [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8] - k: 10 - - match: { hits.total.value: 5 } - - match: { hits.hits.0._id: "2" } + query_vector: [0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7] + k: 2 + - match: { hits.hits.0._id: "6" } + - match: { hits.hits.1._id: "5" } + + - do: + bulk: + index: my_vectors + refresh: true + body: + - index: + _id: "6" + - text: "Sixth document" + embedding: [0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16] + - match: { errors: false } + + - do: + search: + index: my_vectors + body: + knn: + field: embedding + query_vector: [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1] + k: 2 + - match: { hits.hits.0._id: "6" } + - match: { hits.hits.1._id: "2" } - do: indices.forcemerge: @@ -111,6 +133,7 @@ body: knn: field: embedding - query_vector: [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8] - k: 10 - - match: { hits.total.value: 5 } + query_vector: [0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7] + k: 2 + - match: { hits.hits.0._id: "5" } + - match: { hits.hits.1._id: "4" } diff --git a/x-pack/plugin/gpu/src/yamlRestTest/resources/rest-api-spec/test/gpu/20_int8_hnsw.yml b/x-pack/plugin/gpu/src/yamlRestTest/resources/rest-api-spec/test/gpu/20_int8_hnsw.yml new file mode 100644 index 0000000000000..e0a6f42409b66 --- /dev/null +++ b/x-pack/plugin/gpu/src/yamlRestTest/resources/rest-api-spec/test/gpu/20_int8_hnsw.yml @@ -0,0 +1,148 @@ +--- +"Test GPU vector operations": + + - requires: + cluster_features: [ "vectors.indexing.use_gpu" ] + reason: "A cluster should have a GPU plugin to run these tests" + + # creating an index is successful even if the GPU is not available + - do: + indices.create: + index: my_vectors + body: + mappings: + properties: + embedding: + type: dense_vector + dims: 24 + similarity: l2_norm + index_options: + type: int8_hnsw + settings: + index.number_of_shards: 1 + index.vectors.indexing.use_gpu: true + index.refresh_interval: -1 # disable automatic refresh to ensure documents are indexed together + - match: { error: null } + + - do: + bulk: + index: my_vectors + refresh: true + body: + - index: + _id: "1" + - text: "First document" + embedding: [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1] + - index: + _id: "2" + - text: "Second document" + embedding: [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2] + - index: + _id: "3" + - text: "Third document" + embedding: [0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3] + - index: + _id: "4" + - text: "Fourth document" + embedding: [0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4] + - index: + _id: "5" + - text: "Fifth document" + embedding: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] + - index: + _id: "6" + - text: "Sixth document" + embedding: [0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6] + - index: + _id: "7" + - text: "Seventh document" + embedding: [0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7] + - index: + _id : "8" + - text: "Eighth document" + embedding: [0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8] + - index: + _id: "9" + - text: "Ninth document" + embedding: [0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9] + - index: + _id: "10" + - text: "Tenth document" + embedding: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + - match: { errors: false } + + - do: + search: + index: my_vectors + body: + knn: + field: embedding + query_vector: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + k: 2 + - match: { hits.hits.0._id: "10" } + - match: { hits.hits.1._id: "9" } + + - do: + bulk: + index: my_vectors + refresh: true + body: + - delete: + _id: "1" + - delete: + _id: "10" + - match: { errors: false } + + - do: + indices.forcemerge: + index: my_vectors + max_num_segments: 1 + + - do: + search: + index: my_vectors + body: + knn: + field: embedding + query_vector: [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ] + k: 2 + - match: { hits.hits.0._id: "9" } + - match: { hits.hits.1._id: "8" } + + + - do: + bulk: + index: my_vectors + refresh: true + body: + - index: + _id: "2" + - text: "Second document" + embedding: [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ] + + - do: + indices.forcemerge: + index: my_vectors + max_num_segments: 1 + + - do: + search: + index: my_vectors + body: + knn: + field: embedding + query_vector: [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ] + k: 2 + - match: { hits.hits.0._id: "2" } + - match: { hits.hits.1._id: "9" } + + - do: + search: + index: my_vectors + body: + knn: + field: embedding + query_vector: [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1] + k: 2 + - match: { hits.hits.0._id: "3" } + - match: { hits.hits.1._id: "4" } From 42eff309f995e4484b93094ef347e05d6dc30bb3 Mon Sep 17 00:00:00 2001 From: "elastic-vault-github-plugin-prod[bot]" <150874479+elastic-vault-github-plugin-prod[bot]@users.noreply.github.com> Date: Sat, 6 Sep 2025 04:32:41 +0000 Subject: [PATCH 064/109] Update cuvs snapshot version to 25.10.0 --- .buildkite/scripts/cuvs-snapshot/current-snapshot-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version index ffcc1010d8e4f..26c8a6f3cd428 100644 --- a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version +++ b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version @@ -1 +1 @@ -73672634 +2c0e124e From 34670e3df5367e826bf5f5291682657bdf0111c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lorenzo=20Dematt=C3=A9?= Date: Tue, 9 Sep 2025 14:58:17 +0200 Subject: [PATCH 065/109] Fix: `ESGpuHnswVectorsWriter#mergeOneField` now supports non memory-mapped inputs (#134310) * Fix merge when no mmap available * PR comments: missing builder, flush only sees floats, logging slow path --- .../gpu/codec/ESGpuHnswVectorsWriter.java | 137 +++++++++--------- .../codec/ESGpuHnswSQVectorsFormatTests.java | 2 + 2 files changed, 73 insertions(+), 66 deletions(-) diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java index df9b47ee5c62d..099ad22c5002e 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java @@ -150,9 +150,9 @@ public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException { flatVectorWriter.flush(maxDoc, sortMap); for (FieldWriter field : fields) { if (sortMap == null) { - writeField(field); + flushField(field); } else { - writeSortingField(field, sortMap); + flushSortingField(field, sortMap); } } } @@ -185,83 +185,56 @@ public long ramBytesUsed() { return total; } - private static final class DatasetOrVectors { - private final CuVSMatrix dataset; - private final float[][] vectors; - - static DatasetOrVectors fromArray(float[][] vectors) { - return new DatasetOrVectors( - vectors.length < MIN_NUM_VECTORS_FOR_GPU_BUILD ? null : CuVSMatrix.ofArray(vectors), - vectors.length < MIN_NUM_VECTORS_FOR_GPU_BUILD ? vectors : null - ); - } - - static DatasetOrVectors fromDataset(CuVSMatrix dataset) { - return new DatasetOrVectors(dataset, null); - } - - private DatasetOrVectors(CuVSMatrix dataset, float[][] vectors) { - this.dataset = dataset; - this.vectors = vectors; - validateState(); - } - - private void validateState() { - if ((dataset == null && vectors == null) || (dataset != null && vectors != null)) { - throw new IllegalStateException("Exactly one of dataset or vectors must be non-null"); - } - } - - int size() { - return dataset != null ? (int) dataset.size() : vectors.length; - } - - CuVSMatrix getDataset() { - return dataset; - } - - float[][] getVectors() { - return vectors; - } - } - - private void writeField(FieldWriter fieldWriter) throws IOException { + /** + * For FlatFieldVectorWriter we only need to support float[] during flush: during indexing users provide floats[], and pass floats to + * FlatFieldVectorWriter, even when we have a BYTE dataType (i.e. an "int8_hnsw" type). + * During merging, we use quantized data, so we need to support byte[] too (see {@link ESGpuHnswVectorsWriter#mergeOneField}), + * but not here. + * That's how our other current formats work: use floats during indexing, and quantized data to build graph during merging. + */ + private void flushField(FieldWriter fieldWriter) throws IOException { float[][] vectors = fieldWriter.flatFieldVectorsWriter.getVectors().toArray(float[][]::new); - writeFieldInternal(fieldWriter.fieldInfo, DatasetOrVectors.fromArray(vectors)); + try (CuVSMatrix dataset = vectors.length < MIN_NUM_VECTORS_FOR_GPU_BUILD ? null : CuVSMatrix.ofArray(vectors)) { + writeFieldInternal(fieldWriter.fieldInfo, dataset, vectors.length); + } } - private void writeSortingField(FieldWriter fieldData, Sorter.DocMap sortMap) throws IOException { + private void flushSortingField(FieldWriter fieldWriter, Sorter.DocMap sortMap) throws IOException { // The flatFieldVectorsWriter's flush method, called before this, has already sorted the vectors according to the sortMap. // We can now treat them as a simple, sorted list of vectors. - float[][] vectors = fieldData.flatFieldVectorsWriter.getVectors().toArray(float[][]::new); - writeFieldInternal(fieldData.fieldInfo, DatasetOrVectors.fromArray(vectors)); + float[][] vectors = fieldWriter.flatFieldVectorsWriter.getVectors().toArray(float[][]::new); + try (CuVSMatrix dataset = vectors.length < MIN_NUM_VECTORS_FOR_GPU_BUILD ? null : CuVSMatrix.ofArray(vectors)) { + writeFieldInternal(fieldWriter.fieldInfo, dataset, vectors.length); + } } - private void writeFieldInternal(FieldInfo fieldInfo, DatasetOrVectors datasetOrVectors) throws IOException { + private void writeFieldInternal(FieldInfo fieldInfo, CuVSMatrix dataset, int datasetSize) throws IOException { try { long vectorIndexOffset = vectorIndex.getFilePointer(); int[][] graphLevelNodeOffsets = new int[1][]; - HnswGraph mockGraph; - if (datasetOrVectors.getVectors() != null) { - int size = datasetOrVectors.size(); + final HnswGraph graph; + if (dataset == null) { if (logger.isDebugEnabled()) { - logger.debug("Skip building carga index; vectors length {} < {} (min for GPU)", size, MIN_NUM_VECTORS_FOR_GPU_BUILD); + logger.debug( + "Skip building carga index; vectors length {} < {} (min for GPU)", + datasetSize, + MIN_NUM_VECTORS_FOR_GPU_BUILD + ); } - mockGraph = writeGraph(size, graphLevelNodeOffsets); + graph = writeMockGraph(datasetSize, graphLevelNodeOffsets); } else { - var dataset = datasetOrVectors.getDataset(); var cuVSResources = cuVSResourceManager.acquire((int) dataset.size(), (int) dataset.columns(), dataset.dataType()); try { try (var index = buildGPUIndex(cuVSResources, fieldInfo.getVectorSimilarityFunction(), dataset)) { assert index != null : "GPU index should be built for field: " + fieldInfo.name; - mockGraph = writeGraph(index.getGraph(), graphLevelNodeOffsets); + graph = writeGraph(index.getGraph(), graphLevelNodeOffsets); } } finally { cuVSResourceManager.release(cuVSResources); } } long vectorIndexLength = vectorIndex.getFilePointer() - vectorIndexOffset; - writeMeta(fieldInfo, vectorIndexOffset, vectorIndexLength, datasetOrVectors.size(), mockGraph, graphLevelNodeOffsets); + writeMeta(fieldInfo, vectorIndexOffset, vectorIndexLength, datasetSize, graph, graphLevelNodeOffsets); } catch (IOException e) { throw e; } catch (Throwable t) { @@ -337,7 +310,7 @@ private HnswGraph writeGraph(CuVSMatrix cagraGraph, int[][] levelNodeOffsets) th } // create a mock graph where every node is connected to every other node - private HnswGraph writeGraph(int elementCount, int[][] levelNodeOffsets) throws IOException { + private HnswGraph writeMockGraph(int elementCount, int[][] levelNodeOffsets) throws IOException { if (elementCount == 0) { return null; } @@ -435,20 +408,52 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE } } try (IndexInput in = mergeState.segmentInfo.dir.openInput(tempRawVectorsFileName, IOContext.DEFAULT)) { - DatasetOrVectors datasetOrVectors; var input = FilterIndexInput.unwrapOnlyTest(in); - if (input instanceof MemorySegmentAccessInput memorySegmentAccessInput && numVectors >= MIN_NUM_VECTORS_FOR_GPU_BUILD) { - var ds = DatasetUtils.getInstance() - .fromInput(memorySegmentAccessInput, numVectors, fieldInfo.getVectorDimension(), dataType); - datasetOrVectors = DatasetOrVectors.fromDataset(ds); + + final CuVSMatrix dataset; + if (numVectors >= MIN_NUM_VECTORS_FOR_GPU_BUILD) { + if (input instanceof MemorySegmentAccessInput memorySegmentAccessInput) { + // Direct access to mmapped file + dataset = DatasetUtils.getInstance() + .fromInput(memorySegmentAccessInput, numVectors, fieldInfo.getVectorDimension(), dataType); + } else { + logger.debug( + () -> "Cannot mmap merged raw vectors temporary file. IndexInput type [" + input.getClass().getSimpleName() + "]" + ); + + // Read vector-by-vector + var builder = CuVSMatrix.hostBuilder(numVectors, fieldInfo.getVectorDimension(), dataType); + + // During merging, we use quantized data, so we need to support byte[] too. + // That's how our current formats work: use floats during indexing, and quantized data to build a graph during merging. + if (dataType == CuVSMatrix.DataType.FLOAT) { + float[] vector = new float[fieldInfo.getVectorDimension()]; + for (int i = 0; i < numVectors; ++i) { + input.readFloats(vector, 0, fieldInfo.getVectorDimension()); + builder.addVector(vector); + } + } else { + assert dataType == CuVSMatrix.DataType.BYTE; + byte[] vector = new byte[fieldInfo.getVectorDimension()]; + for (int i = 0; i < numVectors; ++i) { + input.readBytes(vector, 0, fieldInfo.getVectorDimension()); + builder.addVector(vector); + } + } + dataset = builder.build(); + } } else { - // assert numVectors < MIN_NUM_VECTORS_FOR_GPU_BUILD : "numVectors: " + numVectors; // we don't really need real value for vectors here, // we just build a mock graph where every node is connected to every other node - float[][] vectors = new float[numVectors][fieldInfo.getVectorDimension()]; - datasetOrVectors = DatasetOrVectors.fromArray(vectors); + dataset = null; + } + try { + writeFieldInternal(fieldInfo, dataset, numVectors); + } finally { + if (dataset != null) { + dataset.close(); + } } - writeFieldInternal(fieldInfo, datasetOrVectors); } finally { org.apache.lucene.util.IOUtils.deleteFilesIgnoringExceptions(mergeState.segmentInfo.dir, tempRawVectorsFileName); } diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswSQVectorsFormatTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswSQVectorsFormatTests.java index 7c2dce8adcfec..8d639ccf58d32 100644 --- a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswSQVectorsFormatTests.java +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswSQVectorsFormatTests.java @@ -10,11 +10,13 @@ import org.apache.lucene.index.VectorEncoding; import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; +import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.lucene.tests.util.TestUtil; import org.elasticsearch.common.logging.LogConfigurator; import org.elasticsearch.xpack.gpu.GPUSupport; import org.junit.BeforeClass; +@LuceneTestCase.SuppressSysoutChecks(bugUrl = "https://github.com/rapidsai/cuvs/issues/1310") public class ESGpuHnswSQVectorsFormatTests extends BaseKnnVectorsFormatTestCase { static { From 2af02f12058d2fcdd2c608710bbecab09a4f5387 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Tue, 9 Sep 2025 13:22:16 -0400 Subject: [PATCH 066/109] Fix TransportResumeFollowActionTests.testDynamicIndexSettingsAreClassified --- .../xpack/ccr/action/TransportResumeFollowAction.java | 1 + 1 file changed, 1 insertion(+) diff --git a/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/TransportResumeFollowAction.java b/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/TransportResumeFollowAction.java index b0be3e21bbc7c..3bcc5ecb87d24 100644 --- a/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/TransportResumeFollowAction.java +++ b/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/TransportResumeFollowAction.java @@ -496,6 +496,7 @@ static String[] extractLeaderShardHistoryUUIDs(Map ccrIndexMetad IndexSettings.INDEX_FLUSH_AFTER_MERGE_THRESHOLD_SIZE_SETTING, IndexSettings.INDEX_GC_DELETES_SETTING, IndexSettings.MAX_REFRESH_LISTENERS_PER_SHARD, + IndexSettings.VECTORS_INDEXING_USE_GPU_SETTING, IndicesRequestCache.INDEX_CACHE_REQUEST_ENABLED_SETTING, BitsetFilterCache.INDEX_LOAD_RANDOM_ACCESS_FILTERS_EAGERLY_SETTING, SearchSlowLog.INDEX_SEARCH_SLOWLOG_THRESHOLD_FETCH_DEBUG_SETTING, From d3b8eeb5f4be6a9ae22db468b255abdb257c6462 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Tue, 9 Sep 2025 13:45:41 -0400 Subject: [PATCH 067/109] Fix failed tests in ":x-pack:plugin:esql:qa:server:multi-node:javaRestTest" --- .../index/mapper/vectors/DenseVectorFieldMapper.java | 12 ++++-------- .../mapper/vectors/DenseVectorFieldMapperTests.java | 2 +- .../inference/mapper/SemanticTextFieldMapper.java | 2 +- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java index 7450b510c855a..53e6dddf504b5 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java @@ -3047,18 +3047,14 @@ public KnnVectorsFormat getKnnVectorsFormatForField(KnnVectorsFormat defaultForm format = fieldType().elementType == ElementType.BIT ? new ES815HnswBitVectorsFormat() : defaultFormat; } else { // if plugins provided alternative KnnVectorsFormat for this indexOptions, use it instead of standard - List extraKnnFormats = new ArrayList<>(); + KnnVectorsFormat extraKnnFormat = null; for (VectorsFormatProvider vectorsFormatProvider : extraVectorsFormatProviders) { - KnnVectorsFormat extraKnnFormat = vectorsFormatProvider.getKnnVectorsFormat(indexSettings, indexOptions); + extraKnnFormat = vectorsFormatProvider.getKnnVectorsFormat(indexSettings, indexOptions); if (extraKnnFormat != null) { - extraKnnFormats.add(extraKnnFormat); + break; } } - if (extraKnnFormats.size() > 0) { - format = extraKnnFormats.get(0); - } else { - format = indexOptions.getVectorsFormat(fieldType().elementType); - } + format = extraKnnFormat != null ? extraKnnFormat : indexOptions.getVectorsFormat(fieldType().elementType); } // It's legal to reuse the same format name as this is the same on-disk format. return new KnnVectorsFormat(format.getName()) { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java index a1256b939e486..fc9ee6a238e8a 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java @@ -2212,7 +2212,7 @@ public void testValidateOnBuild() { int dimensions = randomIntBetween(64, 1024); // Build a dense vector field mapper with float element type, which will trigger int8 HNSW index options - DenseVectorFieldMapper mapper = new DenseVectorFieldMapper.Builder("test", IndexVersion.current(), false, null).elementType( + DenseVectorFieldMapper mapper = new DenseVectorFieldMapper.Builder("test", IndexVersion.current(), false, List.of()).elementType( ElementType.FLOAT ).dimensions(dimensions).build(context); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java index 44710c7cdebf9..e60895c2599bf 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java @@ -1266,7 +1266,7 @@ private static Mapper.Builder createEmbeddingsField( CHUNKED_EMBEDDINGS_FIELD, indexVersionCreated, false, - null + List.of() ); configureDenseVectorMapperBuilder(indexVersionCreated, denseVectorMapperBuilder, modelSettings, indexOptions); From 7917cc1c3fc81da57ca3484f1716964a4518c044 Mon Sep 17 00:00:00 2001 From: "elastic-vault-github-plugin-prod[bot]" <150874479+elastic-vault-github-plugin-prod[bot]@users.noreply.github.com> Date: Thu, 11 Sep 2025 04:31:50 +0000 Subject: [PATCH 068/109] Update cuvs snapshot version to 25.10.0 --- .buildkite/scripts/cuvs-snapshot/current-snapshot-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version index 26c8a6f3cd428..460a915e67129 100644 --- a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version +++ b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version @@ -1 +1 @@ -2c0e124e +60244ca8 From 6da63ec2ab005ad88c9be736df6ea178fc9ef4ed Mon Sep 17 00:00:00 2001 From: ldematte Date: Thu, 11 Sep 2025 14:08:40 +0200 Subject: [PATCH 069/109] (HACK) Add reflection utils to server and export them to GPU --- server/src/main/java/module-info.java | 1 + .../reflect/VectorsFormatReflectionUtils.java | 62 +++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/VectorsFormatReflectionUtils.java diff --git a/server/src/main/java/module-info.java b/server/src/main/java/module-info.java index bcab49bf78859..3bd5fdcf23a79 100644 --- a/server/src/main/java/module-info.java +++ b/server/src/main/java/module-info.java @@ -487,4 +487,5 @@ exports org.elasticsearch.index.codec.vectors to org.elasticsearch.test.knn, org.elasticsearch.gpu; exports org.elasticsearch.index.codec.vectors.es818 to org.elasticsearch.test.knn; exports org.elasticsearch.inference.telemetry; + exports org.elasticsearch.index.codec.vectors.reflect to org.elasticsearch.gpu; } diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/VectorsFormatReflectionUtils.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/VectorsFormatReflectionUtils.java new file mode 100644 index 0000000000000..bbd2c54a74be1 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/VectorsFormatReflectionUtils.java @@ -0,0 +1,62 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.codec.vectors.reflect; + +import org.apache.lucene.codecs.hnsw.FlatVectorsWriter; +import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsWriter; +import org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsWriter; +import org.apache.lucene.store.IndexOutput; +import org.elasticsearch.index.codec.vectors.ES814ScalarQuantizedVectorsFormat; + +import java.lang.invoke.MethodHandles; +import java.lang.invoke.VarHandle; + +public class VectorsFormatReflectionUtils { + + private static final VarHandle FLAT_VECTOR_DATA_HANDLE; + private static final VarHandle QUANTIZED_VECTOR_DATA_HANDLE; + private static final VarHandle DELEGATE_WRITER_HANDLE; + private static final VarHandle RAW_DELEGATE_WRITER_HANDLE; + + static final Class L99_SQ_VW_CLS = Lucene99ScalarQuantizedVectorsWriter.class; + static final Class L99_F_VW_CLS = Lucene99FlatVectorsWriter.class; + static final Class ES814_SQ_VW_CLS = ES814ScalarQuantizedVectorsFormat.ES814ScalarQuantizedVectorsWriter.class; + + static { + try { + var lookup = MethodHandles.privateLookupIn(L99_F_VW_CLS, MethodHandles.lookup()); + FLAT_VECTOR_DATA_HANDLE = lookup.findVarHandle(L99_F_VW_CLS, "vectorData", IndexOutput.class); + + lookup = MethodHandles.privateLookupIn(L99_SQ_VW_CLS, MethodHandles.lookup()); + QUANTIZED_VECTOR_DATA_HANDLE = lookup.findVarHandle(L99_SQ_VW_CLS, "quantizedVectorData", IndexOutput.class); + RAW_DELEGATE_WRITER_HANDLE = lookup.findVarHandle(L99_SQ_VW_CLS, "rawVectorDelegate", FlatVectorsWriter.class); + + lookup = MethodHandles.privateLookupIn(ES814_SQ_VW_CLS, MethodHandles.lookup()); + DELEGATE_WRITER_HANDLE = lookup.findVarHandle(ES814_SQ_VW_CLS, "delegate", L99_SQ_VW_CLS); + + } catch (IllegalAccessException e) { + throw new AssertionError("should not happen, check opens", e); + } catch (ReflectiveOperationException e) { + throw new AssertionError(e); + } + } + + public static IndexOutput getQuantizedVectorDataIndexOutput(FlatVectorsWriter flatVectorWriter) { + assert flatVectorWriter instanceof ES814ScalarQuantizedVectorsFormat.ES814ScalarQuantizedVectorsWriter; + var delegate = (Lucene99ScalarQuantizedVectorsWriter) DELEGATE_WRITER_HANDLE.get(flatVectorWriter); + var rawVectorDelegate = (Lucene99FlatVectorsWriter) RAW_DELEGATE_WRITER_HANDLE.get(delegate); + return (IndexOutput) FLAT_VECTOR_DATA_HANDLE.get(rawVectorDelegate); + } + + public static IndexOutput getVectorDataIndexOutput(FlatVectorsWriter flatVectorWriter) { + assert flatVectorWriter instanceof Lucene99FlatVectorsWriter; + return (IndexOutput) FLAT_VECTOR_DATA_HANDLE.get(flatVectorWriter); + } +} From 5849a6fc7e4dba61ec7f8d181a624e5d5feba37b Mon Sep 17 00:00:00 2001 From: ldematte Date: Thu, 11 Sep 2025 14:09:33 +0200 Subject: [PATCH 070/109] Refactor to isolate current flush logic to flushFieldsWithoutMemoryMappedFile --- .../gpu/codec/ESGpuHnswVectorsWriter.java | 146 ++++++++++++++---- 1 file changed, 117 insertions(+), 29 deletions(-) diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java index 099ad22c5002e..c897e4770bf2f 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java @@ -16,6 +16,7 @@ import org.apache.lucene.codecs.KnnVectorsWriter; import org.apache.lucene.codecs.hnsw.FlatFieldVectorsWriter; import org.apache.lucene.codecs.hnsw.FlatVectorsWriter; +import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsWriter; import org.apache.lucene.index.ByteVectorValues; import org.apache.lucene.index.DocsWithFieldSet; import org.apache.lucene.index.FieldInfo; @@ -23,10 +24,13 @@ import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.KnnVectorValues; import org.apache.lucene.index.MergeState; +import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.Sorter; import org.apache.lucene.index.VectorEncoding; import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FilterDirectory; import org.apache.lucene.store.FilterIndexInput; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; @@ -42,12 +46,14 @@ import org.elasticsearch.index.codec.vectors.ES814ScalarQuantizedVectorsFormat; import org.elasticsearch.logging.LogManager; import org.elasticsearch.logging.Logger; +import org.elasticsearch.index.codec.vectors.reflect.VectorsFormatReflectionUtils; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; import java.util.List; import java.util.Objects; @@ -73,6 +79,7 @@ final class ESGpuHnswVectorsWriter extends KnnVectorsWriter { private final CuVSResourceManager cuVSResourceManager; private final SegmentWriteState segmentWriteState; private final IndexOutput meta, vectorIndex; + private final IndexOutput vectorData; private final int M; private final int beamWidth; private final FlatVectorsWriter flatVectorWriter; @@ -94,8 +101,11 @@ final class ESGpuHnswVectorsWriter extends KnnVectorsWriter { this.beamWidth = beamWidth; this.flatVectorWriter = flatVectorWriter; if (flatVectorWriter instanceof ES814ScalarQuantizedVectorsFormat.ES814ScalarQuantizedVectorsWriter) { + vectorData = VectorsFormatReflectionUtils.getQuantizedVectorDataIndexOutput(flatVectorWriter); dataType = CuVSMatrix.DataType.BYTE; } else { + assert flatVectorWriter instanceof Lucene99FlatVectorsWriter; + vectorData = VectorsFormatReflectionUtils.getVectorDataIndexOutput(flatVectorWriter); dataType = CuVSMatrix.DataType.FLOAT; } this.segmentWriteState = state; @@ -145,18 +155,115 @@ public KnnFieldVectorsWriter addField(FieldInfo fieldInfo) throws IOException return newField; } + private record FieldEntry(long vectorDataOffset, long vectorDataLength) {} + + /** + * Flushes vector data and associated data to disk. + *

+ * This method and the private helpers it calls only need to support FLOAT32. + * For FlatFieldVectorWriter we only need to support float[] during flush: during indexing users provide floats[], and pass floats to + * FlatFieldVectorWriter, even when we have a BYTE dataType (i.e. an "int8_hnsw" type). + * During merging, we use quantized data, so we need to support byte[] too (see {@link ESGpuHnswVectorsWriter#mergeOneField}), + * but not here. + * That's how our other current formats work: use floats during indexing, and quantized data to build graph during merging. + *

+ */ @Override public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException { + SegmentInfo segmentInfo = segmentWriteState.segmentInfo; + var mappedFields = new HashMap(); + flatVectorWriter.flush(maxDoc, sortMap); + + var directory = FilterDirectory.unwrap(segmentWriteState.segmentInfo.dir); + logger.info( + "Directory is of type [{}], unwrapped [{}]", + segmentWriteState.segmentInfo.dir.getClass().getName(), + directory.getClass().getName() + ); + flushFieldsWithoutMemoryMappedFile(sortMap); + + // if (TODO || mappedFields.isEmpty()) { + // // No tmp file written + // flushFieldsWithoutMemoryMappedFile(sortMap); + // } else { + // // If we have written one or more fields to a tmp file, read back the file to try and mmap it + // try (IndexInput in = directory.openInput(vectorData.getName(), IOContext.DEFAULT)) { + // var input = FilterIndexInput.unwrapOnlyTest(in); + // if (input instanceof MemorySegmentAccessInput memorySegmentAccessInput) { + // flushFieldsWithMemoryMappedFile(sortMap, memorySegmentAccessInput, mappedFields); + // } else { + // flushFieldsWithoutMemoryMappedFile(sortMap); + // } + // } + // } + } + + // private void flushFieldsWithMemoryMappedFile( + // Sorter.DocMap sortMap, + // MemorySegmentAccessInput memorySegmentAccessInput, + // HashMap mappedFields + // ) throws IOException { + // for (FieldWriter field : fields) { + // var fieldInfo = field.fieldInfo; + // + // var numVectors = field.flatFieldVectorsWriter.getVectors().size(); + // if (numVectors < MIN_NUM_VECTORS_FOR_GPU_BUILD) { + // // Will not be indexed on the GPU + // assert mappedFields.containsKey(fieldInfo.number) == false; + // flushField(fieldInfo, null, numVectors, sortMap); + // } else { + // var fieldEntry = mappedFields.get(fieldInfo.number); + // assert fieldEntry != null; + // + // flushField( + // fieldInfo, + // DatasetUtils.getInstance() + // .fromSlice( + // memorySegmentAccessInput, + // fieldEntry.vectorDataOffset, + // fieldEntry.vectorDataLength, + // numVectors, + // fieldInfo.getVectorDimension(), + // CuVSMatrix.DataType.FLOAT + // ), + // numVectors, + // sortMap + // ); + // } + // } + // } + + private void flushFieldsWithoutMemoryMappedFile(Sorter.DocMap sortMap) throws IOException { + // No tmp file written, or the file cannot be mmapped for (FieldWriter field : fields) { - if (sortMap == null) { - flushField(field); + var fieldInfo = field.fieldInfo; + + var numVectors = field.flatFieldVectorsWriter.getVectors().size(); + if (numVectors < MIN_NUM_VECTORS_FOR_GPU_BUILD) { + // Will not be indexed on the GPU + flushField(fieldInfo, null, numVectors, sortMap); } else { - flushSortingField(field, sortMap); + var builder = CuVSMatrix.hostBuilder(numVectors, fieldInfo.getVectorDimension(), CuVSMatrix.DataType.FLOAT); + for (var vector : field.flatFieldVectorsWriter.getVectors()) { + builder.addVector(vector); + } + try (var dataset = builder.build()) { + flushField(fieldInfo, dataset, numVectors, sortMap); + } } } } + private void flushField(FieldInfo fieldInfo, CuVSMatrix dataset, int numVectors, Sorter.DocMap sortMap) throws IOException { + if (sortMap == null) { + writeFieldInternal(fieldInfo, dataset, numVectors); + } else { + // TODO: use sortMap + writeFieldInternal(fieldInfo, dataset, numVectors); + } + } + @Override public void finish() throws IOException { if (finished) { @@ -185,29 +292,6 @@ public long ramBytesUsed() { return total; } - /** - * For FlatFieldVectorWriter we only need to support float[] during flush: during indexing users provide floats[], and pass floats to - * FlatFieldVectorWriter, even when we have a BYTE dataType (i.e. an "int8_hnsw" type). - * During merging, we use quantized data, so we need to support byte[] too (see {@link ESGpuHnswVectorsWriter#mergeOneField}), - * but not here. - * That's how our other current formats work: use floats during indexing, and quantized data to build graph during merging. - */ - private void flushField(FieldWriter fieldWriter) throws IOException { - float[][] vectors = fieldWriter.flatFieldVectorsWriter.getVectors().toArray(float[][]::new); - try (CuVSMatrix dataset = vectors.length < MIN_NUM_VECTORS_FOR_GPU_BUILD ? null : CuVSMatrix.ofArray(vectors)) { - writeFieldInternal(fieldWriter.fieldInfo, dataset, vectors.length); - } - } - - private void flushSortingField(FieldWriter fieldWriter, Sorter.DocMap sortMap) throws IOException { - // The flatFieldVectorsWriter's flush method, called before this, has already sorted the vectors according to the sortMap. - // We can now treat them as a simple, sorted list of vectors. - float[][] vectors = fieldWriter.flatFieldVectorsWriter.getVectors().toArray(float[][]::new); - try (CuVSMatrix dataset = vectors.length < MIN_NUM_VECTORS_FOR_GPU_BUILD ? null : CuVSMatrix.ofArray(vectors)) { - writeFieldInternal(fieldWriter.fieldInfo, dataset, vectors.length); - } - } - private void writeFieldInternal(FieldInfo fieldInfo, CuVSMatrix dataset, int datasetSize) throws IOException { try { long vectorIndexOffset = vectorIndex.getFilePointer(); @@ -384,9 +468,13 @@ public NodesIterator getNodesOnLevel(int level) { }; } + @SuppressForbidden(reason = "require usage of Lucene's IOUtils#deleteFilesIgnoringExceptions(...)") + private static void deleteFilesIgnoringExceptions(Directory dir, String fileName) { + org.apache.lucene.util.IOUtils.deleteFilesIgnoringExceptions(dir, fileName); + } + // TODO check with deleted documents @Override - @SuppressForbidden(reason = "require usage of Lucene's IOUtils#deleteFilesIgnoringExceptions(...)") public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOException { flatVectorWriter.mergeOneField(fieldInfo, mergeState); final int numVectors; @@ -404,7 +492,7 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE success = true; } finally { if (success == false && tempRawVectorsFileName != null) { - org.apache.lucene.util.IOUtils.deleteFilesIgnoringExceptions(mergeState.segmentInfo.dir, tempRawVectorsFileName); + deleteFilesIgnoringExceptions(mergeState.segmentInfo.dir, tempRawVectorsFileName); } } try (IndexInput in = mergeState.segmentInfo.dir.openInput(tempRawVectorsFileName, IOContext.DEFAULT)) { @@ -455,7 +543,7 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE } } } finally { - org.apache.lucene.util.IOUtils.deleteFilesIgnoringExceptions(mergeState.segmentInfo.dir, tempRawVectorsFileName); + deleteFilesIgnoringExceptions(mergeState.segmentInfo.dir, tempRawVectorsFileName); } } From c34be34b04cff0a5a62390cbbb2a955d751f997c Mon Sep 17 00:00:00 2001 From: ldematte Date: Thu, 11 Sep 2025 16:27:59 +0200 Subject: [PATCH 071/109] Reproduce flatVectorWriter.flush() via reflection to "instrument" it --- .../ES814ScalarQuantizedVectorsFormat.java | 2 +- .../reflect/VectorsFormatReflectionUtils.java | 138 +++++++++++++++++- .../gpu/codec/ESGpuHnswVectorsWriter.java | 73 ++++++++- 3 files changed, 201 insertions(+), 12 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java index c6416a13cbb92..08237cd5ffece 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java @@ -134,7 +134,7 @@ public FlatVectorsReader fieldsReader(SegmentReadState state) throws IOException public static final class ES814ScalarQuantizedVectorsWriter extends FlatVectorsWriter { - final Lucene99ScalarQuantizedVectorsWriter delegate; + public final Lucene99ScalarQuantizedVectorsWriter delegate; ES814ScalarQuantizedVectorsWriter(Lucene99ScalarQuantizedVectorsWriter delegate) { super(delegate.getFlatVectorScorer()); diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/VectorsFormatReflectionUtils.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/VectorsFormatReflectionUtils.java index bbd2c54a74be1..e34b4b3901b6e 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/VectorsFormatReflectionUtils.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/VectorsFormatReflectionUtils.java @@ -9,38 +9,81 @@ package org.elasticsearch.index.codec.vectors.reflect; +import org.apache.lucene.codecs.hnsw.FlatFieldVectorsWriter; import org.apache.lucene.codecs.hnsw.FlatVectorsWriter; import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsWriter; import org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsWriter; +import org.apache.lucene.index.Sorter; import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.quantization.ScalarQuantizer; import org.elasticsearch.index.codec.vectors.ES814ScalarQuantizedVectorsFormat; +import java.lang.invoke.MethodHandle; import java.lang.invoke.MethodHandles; +import java.lang.invoke.MethodType; import java.lang.invoke.VarHandle; public class VectorsFormatReflectionUtils { private static final VarHandle FLAT_VECTOR_DATA_HANDLE; private static final VarHandle QUANTIZED_VECTOR_DATA_HANDLE; - private static final VarHandle DELEGATE_WRITER_HANDLE; private static final VarHandle RAW_DELEGATE_WRITER_HANDLE; + private static final VarHandle RAW_FIELD_DELEGATE_WRITER_HANDLE; + + private static final MethodHandle lucene99FlatVectorsWriter_writeField$mh; + private static final MethodHandle lucene99FlatVectorsWriter_writeSortingField$mh; + + private static final MethodHandle lucene99ScalarQuantizedVectorsWriter_writeField$mh; + private static final MethodHandle lucene99ScalarQuantizedVectorsWriter_writeSortingField$mh; + private static final MethodHandle lucene99ScalarQuantizedVectorsWriter_FieldWriter_createQuantizer$mh; static final Class L99_SQ_VW_CLS = Lucene99ScalarQuantizedVectorsWriter.class; + static final Class L99_SQ_VW_FIELD_WRITER_CLS; static final Class L99_F_VW_CLS = Lucene99FlatVectorsWriter.class; - static final Class ES814_SQ_VW_CLS = ES814ScalarQuantizedVectorsFormat.ES814ScalarQuantizedVectorsWriter.class; + static final Class L99_F_VW_FIELD_WRITER_CLS; static { try { + L99_F_VW_FIELD_WRITER_CLS = Class.forName("org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsWriter$FieldWriter"); + L99_SQ_VW_FIELD_WRITER_CLS = + Class.forName("org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsWriter$FieldWriter"); var lookup = MethodHandles.privateLookupIn(L99_F_VW_CLS, MethodHandles.lookup()); FLAT_VECTOR_DATA_HANDLE = lookup.findVarHandle(L99_F_VW_CLS, "vectorData", IndexOutput.class); + lucene99FlatVectorsWriter_writeField$mh = lookup.findVirtual( + L99_F_VW_CLS, + "writeField", + MethodType.methodType(void.class, L99_F_VW_FIELD_WRITER_CLS, int.class) + ); + + lucene99FlatVectorsWriter_writeSortingField$mh = lookup.findVirtual( + L99_F_VW_CLS, + "writeSortingField", + MethodType.methodType(void.class, L99_F_VW_FIELD_WRITER_CLS, int.class, Sorter.DocMap.class) + ); lookup = MethodHandles.privateLookupIn(L99_SQ_VW_CLS, MethodHandles.lookup()); QUANTIZED_VECTOR_DATA_HANDLE = lookup.findVarHandle(L99_SQ_VW_CLS, "quantizedVectorData", IndexOutput.class); RAW_DELEGATE_WRITER_HANDLE = lookup.findVarHandle(L99_SQ_VW_CLS, "rawVectorDelegate", FlatVectorsWriter.class); - - lookup = MethodHandles.privateLookupIn(ES814_SQ_VW_CLS, MethodHandles.lookup()); - DELEGATE_WRITER_HANDLE = lookup.findVarHandle(ES814_SQ_VW_CLS, "delegate", L99_SQ_VW_CLS); - + lucene99ScalarQuantizedVectorsWriter_writeField$mh = lookup.findVirtual( + L99_SQ_VW_CLS, + "writeField", + MethodType.methodType(void.class, L99_SQ_VW_FIELD_WRITER_CLS, int.class, ScalarQuantizer.class) + ); + lucene99ScalarQuantizedVectorsWriter_writeSortingField$mh = lookup.findVirtual( + L99_SQ_VW_CLS, + "writeSortingField", + MethodType.methodType(void.class, L99_SQ_VW_FIELD_WRITER_CLS, int.class, Sorter.DocMap.class, ScalarQuantizer.class) + ); + RAW_FIELD_DELEGATE_WRITER_HANDLE = lookup.findVarHandle( + L99_SQ_VW_FIELD_WRITER_CLS, + "flatFieldVectorsWriter", + FlatFieldVectorsWriter.class + ); + lucene99ScalarQuantizedVectorsWriter_FieldWriter_createQuantizer$mh = lookup.findVirtual( + L99_SQ_VW_FIELD_WRITER_CLS, + "createQuantizer", + MethodType.methodType(ScalarQuantizer.class) + ); } catch (IllegalAccessException e) { throw new AssertionError("should not happen, check opens", e); } catch (ReflectiveOperationException e) { @@ -50,8 +93,7 @@ public class VectorsFormatReflectionUtils { public static IndexOutput getQuantizedVectorDataIndexOutput(FlatVectorsWriter flatVectorWriter) { assert flatVectorWriter instanceof ES814ScalarQuantizedVectorsFormat.ES814ScalarQuantizedVectorsWriter; - var delegate = (Lucene99ScalarQuantizedVectorsWriter) DELEGATE_WRITER_HANDLE.get(flatVectorWriter); - var rawVectorDelegate = (Lucene99FlatVectorsWriter) RAW_DELEGATE_WRITER_HANDLE.get(delegate); + var rawVectorDelegate = getRawVectorDelegate((ES814ScalarQuantizedVectorsFormat.ES814ScalarQuantizedVectorsWriter)flatVectorWriter); return (IndexOutput) FLAT_VECTOR_DATA_HANDLE.get(rawVectorDelegate); } @@ -59,4 +101,84 @@ public static IndexOutput getVectorDataIndexOutput(FlatVectorsWriter flatVectorW assert flatVectorWriter instanceof Lucene99FlatVectorsWriter; return (IndexOutput) FLAT_VECTOR_DATA_HANDLE.get(flatVectorWriter); } + + // private void Lucene99FlatVectorsWriter#writeField(FieldWriter fieldData, int maxDoc) + public static void lucene99FlatVectorsWriter_writeField( + Lucene99FlatVectorsWriter that, + FlatFieldVectorsWriter fieldData, int maxDoc + ) { + try { + lucene99FlatVectorsWriter_writeField$mh.invoke(that, fieldData, maxDoc); + } catch (Throwable e) { + throw new RuntimeException(e); + } + } + + // private void Lucene99FlatVectorsWriter#writeSortingField(FieldWriter fieldData, int maxDoc) + public static void lucene99FlatVectorsWriter_writeSortingField( + Lucene99FlatVectorsWriter that, + FlatFieldVectorsWriter fieldData, + int maxDoc, + Sorter.DocMap sortMap + ) { + try { + lucene99FlatVectorsWriter_writeSortingField$mh.invoke(that, fieldData, maxDoc, sortMap); + } catch (Throwable e) { + throw new RuntimeException(e); + } + } + + // private void Lucene99FlatVectorsWriter#writeField(FieldWriter fieldData, int maxDoc) + public static void lucene99ScalarQuantizedVectorsWriter_writeField( + Lucene99ScalarQuantizedVectorsWriter that, + FlatFieldVectorsWriter fieldData, + int maxDoc, + ScalarQuantizer scalarQuantizer + ) { + try { + lucene99ScalarQuantizedVectorsWriter_writeField$mh.invoke(that, fieldData, maxDoc, scalarQuantizer); + } catch (Throwable e) { + throw new RuntimeException(e); + } + } + + // private void Lucene99FlatVectorsWriter#writeSortingField(FieldWriter fieldData, int maxDoc) + public static void lucene99ScalarQuantizedVectorsWriter_writeSortingField( + Lucene99ScalarQuantizedVectorsWriter that, + FlatFieldVectorsWriter fieldData, + int maxDoc, + Sorter.DocMap sortMap, + ScalarQuantizer scalarQuantizer + ) { + try { + lucene99ScalarQuantizedVectorsWriter_writeSortingField$mh.invoke(that, fieldData, maxDoc, sortMap, scalarQuantizer); + } catch (Throwable e) { + throw new RuntimeException(e); + } + } + + public static ScalarQuantizer lucene99ScalarQuantizedVectorsWriter_FieldWriter_createQuantizer(FlatFieldVectorsWriter fieldData) { + try { + return (ScalarQuantizer) lucene99ScalarQuantizedVectorsWriter_FieldWriter_createQuantizer$mh.invoke(fieldData); + } catch (Throwable e) { + throw new RuntimeException(e); + } + } + + public static Lucene99FlatVectorsWriter getRawVectorDelegate( + ES814ScalarQuantizedVectorsFormat.ES814ScalarQuantizedVectorsWriter quantizedVectorsWriter + ) { + return (Lucene99FlatVectorsWriter) RAW_DELEGATE_WRITER_HANDLE.get(quantizedVectorsWriter.delegate); + } + + public static FlatFieldVectorsWriter getRawFieldVectorDelegate(FlatFieldVectorsWriter flatFieldVectorsWriter) { + if (L99_F_VW_FIELD_WRITER_CLS.isAssignableFrom(flatFieldVectorsWriter.getClass())) { + return flatFieldVectorsWriter; + } else { + assert L99_SQ_VW_FIELD_WRITER_CLS.isAssignableFrom(flatFieldVectorsWriter.getClass()); + @SuppressWarnings("unchecked") + var rawFieldVectorDelegate = (FlatFieldVectorsWriter) RAW_FIELD_DELEGATE_WRITER_HANDLE.get(flatFieldVectorsWriter); + return rawFieldVectorDelegate; + } + } } diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java index c897e4770bf2f..ec5262a180bb9 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java @@ -44,9 +44,9 @@ import org.elasticsearch.core.IOUtils; import org.elasticsearch.core.SuppressForbidden; import org.elasticsearch.index.codec.vectors.ES814ScalarQuantizedVectorsFormat; +import org.elasticsearch.index.codec.vectors.reflect.VectorsFormatReflectionUtils; import org.elasticsearch.logging.LogManager; import org.elasticsearch.logging.Logger; -import org.elasticsearch.index.codec.vectors.reflect.VectorsFormatReflectionUtils; import java.io.IOException; import java.nio.ByteBuffer; @@ -55,11 +55,19 @@ import java.util.Arrays; import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Objects; import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.SIMILARITY_FUNCTIONS; import static org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsWriter.mergeAndRecalculateQuantiles; import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; +import static org.elasticsearch.index.codec.vectors.reflect.VectorsFormatReflectionUtils.getRawFieldVectorDelegate; +import static org.elasticsearch.index.codec.vectors.reflect.VectorsFormatReflectionUtils.getRawVectorDelegate; +import static org.elasticsearch.index.codec.vectors.reflect.VectorsFormatReflectionUtils.lucene99FlatVectorsWriter_writeField; +import static org.elasticsearch.index.codec.vectors.reflect.VectorsFormatReflectionUtils.lucene99FlatVectorsWriter_writeSortingField; +import static org.elasticsearch.index.codec.vectors.reflect.VectorsFormatReflectionUtils.lucene99ScalarQuantizedVectorsWriter_FieldWriter_createQuantizer; +import static org.elasticsearch.index.codec.vectors.reflect.VectorsFormatReflectionUtils.lucene99ScalarQuantizedVectorsWriter_writeField; +import static org.elasticsearch.index.codec.vectors.reflect.VectorsFormatReflectionUtils.lucene99ScalarQuantizedVectorsWriter_writeSortingField; import static org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat.LUCENE99_HNSW_META_CODEC_NAME; import static org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat.LUCENE99_HNSW_META_EXTENSION; import static org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat.LUCENE99_HNSW_VECTOR_INDEX_CODEC_NAME; @@ -173,7 +181,16 @@ public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException { SegmentInfo segmentInfo = segmentWriteState.segmentInfo; var mappedFields = new HashMap(); - flatVectorWriter.flush(maxDoc, sortMap); + // Reproduce flatVectorWriter.flush() + if (flatVectorWriter instanceof Lucene99FlatVectorsWriter lucene99FlatVectorsWriter) { + flushLucene99FlatVectorsWriter(lucene99FlatVectorsWriter, maxDoc, sortMap, mappedFields); + } else { + assert flatVectorWriter instanceof ES814ScalarQuantizedVectorsFormat.ES814ScalarQuantizedVectorsWriter; + var quantizedVectorsWriter = (ES814ScalarQuantizedVectorsFormat.ES814ScalarQuantizedVectorsWriter) flatVectorWriter; + Lucene99FlatVectorsWriter rawVectorDelegate = getRawVectorDelegate(quantizedVectorsWriter); + flushLucene99FlatVectorsWriter(rawVectorDelegate, maxDoc, sortMap, mappedFields); + flushLucene99ScalarQuantizedVectorsWriter(quantizedVectorsWriter, maxDoc, sortMap); + } var directory = FilterDirectory.unwrap(segmentWriteState.segmentInfo.dir); logger.info( @@ -181,9 +198,10 @@ public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException { segmentWriteState.segmentInfo.dir.getClass().getName(), directory.getClass().getName() ); + flushFieldsWithoutMemoryMappedFile(sortMap); - // if (TODO || mappedFields.isEmpty()) { + // if (FsDirectoryFactory.isHybridFs(segmentWriteState.segmentInfo.dir) || mappedFields.isEmpty()) { // // No tmp file written // flushFieldsWithoutMemoryMappedFile(sortMap); // } else { @@ -199,6 +217,55 @@ public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException { // } } + private void flushLucene99ScalarQuantizedVectorsWriter( + ES814ScalarQuantizedVectorsFormat.ES814ScalarQuantizedVectorsWriter quantizedVectorsWriter, + int maxDoc, + Sorter.DocMap sortMap + ) throws IOException { + for (var field : fields) { + ScalarQuantizer quantizer = lucene99ScalarQuantizedVectorsWriter_FieldWriter_createQuantizer(field.flatFieldVectorsWriter); + if (sortMap == null) { + lucene99ScalarQuantizedVectorsWriter_writeField( + quantizedVectorsWriter.delegate, + field.flatFieldVectorsWriter, + maxDoc, + quantizer + ); + } else { + lucene99ScalarQuantizedVectorsWriter_writeSortingField( + quantizedVectorsWriter.delegate, + field.flatFieldVectorsWriter, + maxDoc, + sortMap, + quantizer + ); + } + field.flatFieldVectorsWriter.finish(); + } + } + + private void flushLucene99FlatVectorsWriter( + Lucene99FlatVectorsWriter lucene99FlatVectorsWriter, + int maxDoc, + Sorter.DocMap sortMap, + Map mappedFields + ) throws IOException { + for (var field : fields) { + FlatFieldVectorsWriter flatFieldVectorsWriter = getRawFieldVectorDelegate(field.flatFieldVectorsWriter); + + long vectorDataOffset = vectorData.alignFilePointer(Float.BYTES); + long vectorDataLength = (long) field.fieldInfo.getVectorDimension() * Float.BYTES * flatFieldVectorsWriter.getVectors().size(); + mappedFields.put(field.fieldInfo.number, new FieldEntry(vectorDataOffset, vectorDataLength)); + + if (sortMap == null) { + lucene99FlatVectorsWriter_writeField(lucene99FlatVectorsWriter, flatFieldVectorsWriter, maxDoc); + } else { + lucene99FlatVectorsWriter_writeSortingField(lucene99FlatVectorsWriter, flatFieldVectorsWriter, maxDoc, sortMap); + } + flatFieldVectorsWriter.finish(); + } + } + // private void flushFieldsWithMemoryMappedFile( // Sorter.DocMap sortMap, // MemorySegmentAccessInput memorySegmentAccessInput, From 10610659e545d2f2c175ade581d64c539f0efe00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lorenzo=20Dematt=C3=A9?= Date: Thu, 11 Sep 2025 16:31:43 +0200 Subject: [PATCH 072/109] Using a tmp file for enabling mmap MemorySegment during flush (#134438) --- .../elasticsearch/plugin/gpu/GPUIndexIT.java | 2 +- .../xpack/gpu/codec/DatasetUtils.java | 6 +- .../xpack/gpu/codec/DatasetUtilsImpl.java | 76 +++++++- .../gpu/codec/ESGpuHnswVectorsWriter.java | 173 +++++++++++++++--- .../xpack/gpu/codec/DatasetUtilsImpl.java | 73 -------- 5 files changed, 222 insertions(+), 108 deletions(-) delete mode 100644 x-pack/plugin/gpu/src/main21/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java diff --git a/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java index 0c7031d431012..535efd8c2375c 100644 --- a/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java +++ b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java @@ -99,7 +99,7 @@ public void testSortedIndexReturnsSameResultsAsUnsorted() { for (int i = 0; i < hits1.length; i++) { Assert.assertEquals(hits1[i].getId(), hits2[i].getId()); Assert.assertEquals((String) hits1[i].field("my_keyword").getValue(), (String) hits2[i].field("my_keyword").getValue()); - Assert.assertEquals(hits1[i].getScore(), hits2[i].getScore(), 0.0001f); + Assert.assertEquals(hits1[i].getScore(), hits2[i].getScore(), 0.001f); } } finally { searchResponse1.decRef(); diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtils.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtils.java index 84fc01ec2974f..3a9fcb2c68cd8 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtils.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtils.java @@ -19,6 +19,10 @@ static DatasetUtils getInstance() { return DatasetUtilsImpl.getInstance(); } - /** Returns a Dataset over the float32 vectors in the input. */ + /** Returns a Dataset over the vectors of type {@code dataType} in the input. */ CuVSMatrix fromInput(MemorySegmentAccessInput input, int numVectors, int dims, CuVSMatrix.DataType dataType) throws IOException; + + /** Returns a Dataset over an input slice */ + CuVSMatrix fromSlice(MemorySegmentAccessInput input, long pos, long len, int numVectors, int dims, CuVSMatrix.DataType dataType) + throws IOException; } diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java index 2be5d582c4cb6..0dfb0960cebbe 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java @@ -8,18 +8,86 @@ package org.elasticsearch.xpack.gpu.codec; import com.nvidia.cuvs.CuVSMatrix; +import com.nvidia.cuvs.spi.CuVSProvider; import org.apache.lucene.store.MemorySegmentAccessInput; -/** Stubb holder - never executed. */ +import java.io.IOException; +import java.lang.foreign.MemorySegment; +import java.lang.invoke.MethodHandle; + public class DatasetUtilsImpl implements DatasetUtils { + private static final DatasetUtils INSTANCE = new DatasetUtilsImpl(); + + private static final MethodHandle createDataset$mh = CuVSProvider.provider().newNativeMatrixBuilder(); + static DatasetUtils getInstance() { - throw new UnsupportedOperationException("should not reach here"); + return INSTANCE; + } + + static CuVSMatrix fromMemorySegment(MemorySegment memorySegment, int size, int dimensions, CuVSMatrix.DataType dataType) { + try { + return (CuVSMatrix) createDataset$mh.invokeExact(memorySegment, size, dimensions, dataType); + } catch (Throwable e) { + if (e instanceof Error err) { + throw err; + } else if (e instanceof RuntimeException re) { + throw re; + } else { + throw new RuntimeException(e); + } + } } + private DatasetUtilsImpl() {} + @Override - public CuVSMatrix fromInput(MemorySegmentAccessInput input, int numVectors, int dims, CuVSMatrix.DataType dataType) { - throw new UnsupportedOperationException("should not reach here"); + public CuVSMatrix fromInput(MemorySegmentAccessInput input, int numVectors, int dims, CuVSMatrix.DataType dataType) throws IOException { + if (numVectors < 0 || dims < 0) { + throwIllegalArgumentException(numVectors, dims); + } + return createCuVSMatrix(input, 0L, input.length(), numVectors, dims, dataType); + } + + @Override + public CuVSMatrix fromSlice(MemorySegmentAccessInput input, long pos, long len, int numVectors, int dims, CuVSMatrix.DataType dataType) + throws IOException { + if (pos < 0 || len < 0) { + throw new IllegalArgumentException("pos and len must be positive"); + } + return createCuVSMatrix(input, pos, len, numVectors, dims, dataType); + } + + private static CuVSMatrix createCuVSMatrix( + MemorySegmentAccessInput input, + long pos, + long len, + int numVectors, + int dims, + CuVSMatrix.DataType dataType + ) throws IOException { + MemorySegment ms = input.segmentSliceOrNull(pos, len); + assert ms != null; // TODO: this can be null if larger than 16GB or ... + final int byteSize = dataType == CuVSMatrix.DataType.FLOAT ? Float.BYTES : Byte.BYTES; + if (((long) numVectors * dims * byteSize) > ms.byteSize()) { + throwIllegalArgumentException(ms, numVectors, dims); + } + return fromMemorySegment(ms, numVectors, dims, dataType); + } + + static void throwIllegalArgumentException(MemorySegment ms, int numVectors, int dims) { + var s = "segment of size [" + ms.byteSize() + "] too small for expected " + numVectors + " float vectors of " + dims + " dims"; + throw new IllegalArgumentException(s); + } + + static void throwIllegalArgumentException(int numVectors, int dims) { + String s; + if (numVectors < 0) { + s = "negative number of vectors: " + numVectors; + } else { + s = "negative vector dims: " + dims; + } + throw new IllegalArgumentException(s); } } diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java index 099ad22c5002e..99aac189b9ea9 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java @@ -16,6 +16,7 @@ import org.apache.lucene.codecs.KnnVectorsWriter; import org.apache.lucene.codecs.hnsw.FlatFieldVectorsWriter; import org.apache.lucene.codecs.hnsw.FlatVectorsWriter; +import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsWriter; import org.apache.lucene.index.ByteVectorValues; import org.apache.lucene.index.DocsWithFieldSet; import org.apache.lucene.index.FieldInfo; @@ -23,10 +24,12 @@ import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.KnnVectorValues; import org.apache.lucene.index.MergeState; +import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.Sorter; import org.apache.lucene.index.VectorEncoding; import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.store.Directory; import org.apache.lucene.store.FilterIndexInput; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; @@ -48,6 +51,7 @@ import java.nio.ByteOrder; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; import java.util.List; import java.util.Objects; @@ -96,6 +100,7 @@ final class ESGpuHnswVectorsWriter extends KnnVectorsWriter { if (flatVectorWriter instanceof ES814ScalarQuantizedVectorsFormat.ES814ScalarQuantizedVectorsWriter) { dataType = CuVSMatrix.DataType.BYTE; } else { + assert flatVectorWriter instanceof Lucene99FlatVectorsWriter; dataType = CuVSMatrix.DataType.FLOAT; } this.segmentWriteState = state; @@ -145,16 +150,145 @@ public KnnFieldVectorsWriter addField(FieldInfo fieldInfo) throws IOException return newField; } + private record FieldEntry(long vectorDataOffset, long vectorDataLength) {} + + /** + * Flushes vector data and associated data to disk. + *

+ * This method and the private helpers it calls only need to support FLOAT32. + * For FlatFieldVectorWriter we only need to support float[] during flush: during indexing users provide floats[], and pass floats to + * FlatFieldVectorWriter, even when we have a BYTE dataType (i.e. an "int8_hnsw" type). + * During merging, we use quantized data, so we need to support byte[] too (see {@link ESGpuHnswVectorsWriter#mergeOneField}), + * but not here. + * That's how our other current formats work: use floats during indexing, and quantized data to build graph during merging. + *

+ */ @Override public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException { flatVectorWriter.flush(maxDoc, sortMap); + + // save vector values to a temp file + SegmentInfo segmentInfo = segmentWriteState.segmentInfo; + var mappedFields = new HashMap(); + + String tempRawVectorsFileName = writeTmpRawVectorFile(segmentInfo, mappedFields); + + if (tempRawVectorsFileName == null || mappedFields.isEmpty()) { + // No tmp file written + flushFieldsWithoutMemoryMappedFile(sortMap); + } else { + // If we have written one or more fields to a tmp file, read back the file to try and mmap it + try (IndexInput in = segmentWriteState.segmentInfo.dir.openInput(tempRawVectorsFileName, IOContext.DEFAULT)) { + var input = FilterIndexInput.unwrapOnlyTest(in); + if (input instanceof MemorySegmentAccessInput memorySegmentAccessInput) { + flushFieldsWithMemoryMappedFile(sortMap, memorySegmentAccessInput, mappedFields); + } else { + flushFieldsWithoutMemoryMappedFile(sortMap); + } + } finally { + deleteFilesIgnoringExceptions(segmentInfo.dir, tempRawVectorsFileName); + } + } + } + + private void flushFieldsWithMemoryMappedFile( + Sorter.DocMap sortMap, + MemorySegmentAccessInput memorySegmentAccessInput, + HashMap mappedFields + ) throws IOException { for (FieldWriter field : fields) { - if (sortMap == null) { - flushField(field); + var fieldInfo = field.fieldInfo; + + var numVectors = field.flatFieldVectorsWriter.getVectors().size(); + if (numVectors < MIN_NUM_VECTORS_FOR_GPU_BUILD) { + // Will not be indexed on the GPU + assert mappedFields.containsKey(fieldInfo.number) == false; + flushField(fieldInfo, null, numVectors, sortMap); } else { - flushSortingField(field, sortMap); + var fieldEntry = mappedFields.get(fieldInfo.number); + assert fieldEntry != null; + + flushField( + fieldInfo, + DatasetUtils.getInstance() + .fromSlice( + memorySegmentAccessInput, + fieldEntry.vectorDataOffset, + fieldEntry.vectorDataLength, + numVectors, + fieldInfo.getVectorDimension(), + CuVSMatrix.DataType.FLOAT + ), + numVectors, + sortMap + ); + } + } + } + + private void flushFieldsWithoutMemoryMappedFile(Sorter.DocMap sortMap) throws IOException { + // No tmp file written, or the file cannot be mmapped + for (FieldWriter field : fields) { + var fieldInfo = field.fieldInfo; + + var numVectors = field.flatFieldVectorsWriter.getVectors().size(); + if (numVectors < MIN_NUM_VECTORS_FOR_GPU_BUILD) { + // Will not be indexed on the GPU + flushField(fieldInfo, null, numVectors, sortMap); + } else { + var builder = CuVSMatrix.hostBuilder(numVectors, fieldInfo.getVectorDimension(), CuVSMatrix.DataType.FLOAT); + for (var vector : field.flatFieldVectorsWriter.getVectors()) { + builder.addVector(vector); + } + try (var dataset = builder.build()) { + flushField(fieldInfo, dataset, numVectors, sortMap); + } + } + } + } + + private String writeTmpRawVectorFile(SegmentInfo segmentInfo, HashMap mappedFields) throws IOException { + var success = false; + String tempRawVectorsFileName = null; + + try (IndexOutput out = segmentInfo.dir.createTempOutput(segmentInfo.name, "vec_", IOContext.DEFAULT)) { + tempRawVectorsFileName = out.getName(); + + for (FieldWriter field : fields) { + var numVectors = field.flatFieldVectorsWriter.getVectors().size(); + if (numVectors >= MIN_NUM_VECTORS_FOR_GPU_BUILD) { + var fieldInfo = field.fieldInfo; + + long vectorDataOffset = out.alignFilePointer(Float.BYTES); + final ByteBuffer buffer = ByteBuffer.allocate(fieldInfo.getVectorDimension() * Float.BYTES) + .order(ByteOrder.LITTLE_ENDIAN); + for (var vector : field.flatFieldVectorsWriter.getVectors()) { + buffer.asFloatBuffer().put(vector); + out.writeBytes(buffer.array(), buffer.array().length); + } + long vectorDataLength = out.getFilePointer() - vectorDataOffset; + + mappedFields.put(field.fieldInfo.number, new FieldEntry(vectorDataOffset, vectorDataLength)); + } + } + + CodecUtil.writeFooter(out); + success = true; + } finally { + if (success == false && tempRawVectorsFileName != null) { + deleteFilesIgnoringExceptions(segmentInfo.dir, tempRawVectorsFileName); } } + return tempRawVectorsFileName; + } + + private void flushField(FieldInfo fieldInfo, CuVSMatrix dataset, int numVectors, Sorter.DocMap sortMap) throws IOException { + if (sortMap == null) { + writeFieldInternal(fieldInfo, dataset, numVectors); + } else { + // TODO: use sortMap + writeFieldInternal(fieldInfo, dataset, numVectors); + } } @Override @@ -185,29 +319,6 @@ public long ramBytesUsed() { return total; } - /** - * For FlatFieldVectorWriter we only need to support float[] during flush: during indexing users provide floats[], and pass floats to - * FlatFieldVectorWriter, even when we have a BYTE dataType (i.e. an "int8_hnsw" type). - * During merging, we use quantized data, so we need to support byte[] too (see {@link ESGpuHnswVectorsWriter#mergeOneField}), - * but not here. - * That's how our other current formats work: use floats during indexing, and quantized data to build graph during merging. - */ - private void flushField(FieldWriter fieldWriter) throws IOException { - float[][] vectors = fieldWriter.flatFieldVectorsWriter.getVectors().toArray(float[][]::new); - try (CuVSMatrix dataset = vectors.length < MIN_NUM_VECTORS_FOR_GPU_BUILD ? null : CuVSMatrix.ofArray(vectors)) { - writeFieldInternal(fieldWriter.fieldInfo, dataset, vectors.length); - } - } - - private void flushSortingField(FieldWriter fieldWriter, Sorter.DocMap sortMap) throws IOException { - // The flatFieldVectorsWriter's flush method, called before this, has already sorted the vectors according to the sortMap. - // We can now treat them as a simple, sorted list of vectors. - float[][] vectors = fieldWriter.flatFieldVectorsWriter.getVectors().toArray(float[][]::new); - try (CuVSMatrix dataset = vectors.length < MIN_NUM_VECTORS_FOR_GPU_BUILD ? null : CuVSMatrix.ofArray(vectors)) { - writeFieldInternal(fieldWriter.fieldInfo, dataset, vectors.length); - } - } - private void writeFieldInternal(FieldInfo fieldInfo, CuVSMatrix dataset, int datasetSize) throws IOException { try { long vectorIndexOffset = vectorIndex.getFilePointer(); @@ -384,9 +495,13 @@ public NodesIterator getNodesOnLevel(int level) { }; } + @SuppressForbidden(reason = "require usage of Lucene's IOUtils#deleteFilesIgnoringExceptions(...)") + private static void deleteFilesIgnoringExceptions(Directory dir, String fileName) { + org.apache.lucene.util.IOUtils.deleteFilesIgnoringExceptions(dir, fileName); + } + // TODO check with deleted documents @Override - @SuppressForbidden(reason = "require usage of Lucene's IOUtils#deleteFilesIgnoringExceptions(...)") public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOException { flatVectorWriter.mergeOneField(fieldInfo, mergeState); final int numVectors; @@ -404,7 +519,7 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE success = true; } finally { if (success == false && tempRawVectorsFileName != null) { - org.apache.lucene.util.IOUtils.deleteFilesIgnoringExceptions(mergeState.segmentInfo.dir, tempRawVectorsFileName); + deleteFilesIgnoringExceptions(mergeState.segmentInfo.dir, tempRawVectorsFileName); } } try (IndexInput in = mergeState.segmentInfo.dir.openInput(tempRawVectorsFileName, IOContext.DEFAULT)) { @@ -455,7 +570,7 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE } } } finally { - org.apache.lucene.util.IOUtils.deleteFilesIgnoringExceptions(mergeState.segmentInfo.dir, tempRawVectorsFileName); + deleteFilesIgnoringExceptions(mergeState.segmentInfo.dir, tempRawVectorsFileName); } } diff --git a/x-pack/plugin/gpu/src/main21/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java b/x-pack/plugin/gpu/src/main21/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java deleted file mode 100644 index f70eee02e302d..0000000000000 --- a/x-pack/plugin/gpu/src/main21/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.gpu.codec; - -import com.nvidia.cuvs.CuVSMatrix; -import com.nvidia.cuvs.spi.CuVSProvider; - -import org.apache.lucene.store.MemorySegmentAccessInput; - -import java.io.IOException; -import java.lang.foreign.MemorySegment; -import java.lang.invoke.MethodHandle; - -public class DatasetUtilsImpl implements DatasetUtils { - - private static final DatasetUtils INSTANCE = new DatasetUtilsImpl(); - - private static final MethodHandle createDataset$mh = CuVSProvider.provider().newNativeMatrixBuilder(); - - static DatasetUtils getInstance() { - return INSTANCE; - } - - static CuVSMatrix fromMemorySegment(MemorySegment memorySegment, int size, int dimensions, CuVSMatrix.DataType dataType) { - try { - return (CuVSMatrix) createDataset$mh.invokeExact(memorySegment, size, dimensions, dataType); - } catch (Throwable e) { - if (e instanceof Error err) { - throw err; - } else if (e instanceof RuntimeException re) { - throw re; - } else { - throw new RuntimeException(e); - } - } - } - - private DatasetUtilsImpl() {} - - @Override - public CuVSMatrix fromInput(MemorySegmentAccessInput input, int numVectors, int dims, CuVSMatrix.DataType dataType) throws IOException { - if (numVectors < 0 || dims < 0) { - throwIllegalArgumentException(numVectors, dims); - } - MemorySegment ms = input.segmentSliceOrNull(0L, input.length()); - final int byteSize = dataType == CuVSMatrix.DataType.FLOAT ? Float.BYTES : Byte.BYTES; - assert ms != null; // TODO: this can be null if larger than 16GB or ... - if (((long) numVectors * dims * byteSize) > ms.byteSize()) { - throwIllegalArgumentException(ms, numVectors, dims); - } - return fromMemorySegment(ms, numVectors, dims, dataType); - } - - static void throwIllegalArgumentException(MemorySegment ms, int numVectors, int dims) { - var s = "segment of size [" + ms.byteSize() + "] too small for expected " + numVectors + " float vectors of " + dims + " dims"; - throw new IllegalArgumentException(s); - } - - static void throwIllegalArgumentException(int numVectors, int dims) { - String s; - if (numVectors < 0) { - s = "negative number of vectors: " + numVectors; - } else { - s = "negative vector dims: " + dims; - } - throw new IllegalArgumentException(s); - } -} From e9ddaa087fb1eb35ee57c6cb27b084364156c0f2 Mon Sep 17 00:00:00 2001 From: ldematte Date: Thu, 11 Sep 2025 16:43:10 +0200 Subject: [PATCH 073/109] Enable mmapped file code path --- .../gpu/codec/ESGpuHnswVectorsWriter.java | 71 ++++++++----------- 1 file changed, 31 insertions(+), 40 deletions(-) diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java index f5827d7cd3b10..892006e1d6e83 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java @@ -18,13 +18,11 @@ import org.apache.lucene.codecs.hnsw.FlatVectorsWriter; import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsWriter; import org.apache.lucene.index.ByteVectorValues; -import org.apache.lucene.index.DocsWithFieldSet; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FloatVectorValues; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.KnnVectorValues; import org.apache.lucene.index.MergeState; -import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.Sorter; import org.apache.lucene.index.VectorEncoding; @@ -45,6 +43,7 @@ import org.elasticsearch.core.SuppressForbidden; import org.elasticsearch.index.codec.vectors.ES814ScalarQuantizedVectorsFormat; import org.elasticsearch.index.codec.vectors.reflect.VectorsFormatReflectionUtils; +import org.elasticsearch.index.store.FsDirectoryFactory; import org.elasticsearch.logging.LogManager; import org.elasticsearch.logging.Logger; @@ -57,6 +56,7 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.function.Supplier; import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.SIMILARITY_FUNCTIONS; import static org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsWriter.mergeAndRecalculateQuantiles; @@ -87,7 +87,7 @@ final class ESGpuHnswVectorsWriter extends KnnVectorsWriter { private final CuVSResourceManager cuVSResourceManager; private final SegmentWriteState segmentWriteState; private final IndexOutput meta, vectorIndex; - private final IndexOutput vectorData; + private final Supplier vectorDataSupplier; private final int M; private final int beamWidth; private final FlatVectorsWriter flatVectorWriter; @@ -109,11 +109,11 @@ final class ESGpuHnswVectorsWriter extends KnnVectorsWriter { this.beamWidth = beamWidth; this.flatVectorWriter = flatVectorWriter; if (flatVectorWriter instanceof ES814ScalarQuantizedVectorsFormat.ES814ScalarQuantizedVectorsWriter) { - vectorData = VectorsFormatReflectionUtils.getQuantizedVectorDataIndexOutput(flatVectorWriter); + vectorDataSupplier = () -> VectorsFormatReflectionUtils.getQuantizedVectorDataIndexOutput(flatVectorWriter); dataType = CuVSMatrix.DataType.BYTE; } else { assert flatVectorWriter instanceof Lucene99FlatVectorsWriter; - vectorData = VectorsFormatReflectionUtils.getVectorDataIndexOutput(flatVectorWriter); + vectorDataSupplier = () -> VectorsFormatReflectionUtils.getVectorDataIndexOutput(flatVectorWriter); dataType = CuVSMatrix.DataType.FLOAT; } this.segmentWriteState = state; @@ -178,43 +178,42 @@ private record FieldEntry(long vectorDataOffset, long vectorDataLength) {} */ @Override public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException { - SegmentInfo segmentInfo = segmentWriteState.segmentInfo; var mappedFields = new HashMap(); + var vectorData = vectorDataSupplier.get(); // Reproduce flatVectorWriter.flush() if (flatVectorWriter instanceof Lucene99FlatVectorsWriter lucene99FlatVectorsWriter) { - flushLucene99FlatVectorsWriter(lucene99FlatVectorsWriter, maxDoc, sortMap, mappedFields); + flushLucene99FlatVectorsWriter(lucene99FlatVectorsWriter, maxDoc, sortMap, mappedFields, vectorData); } else { assert flatVectorWriter instanceof ES814ScalarQuantizedVectorsFormat.ES814ScalarQuantizedVectorsWriter; var quantizedVectorsWriter = (ES814ScalarQuantizedVectorsFormat.ES814ScalarQuantizedVectorsWriter) flatVectorWriter; Lucene99FlatVectorsWriter rawVectorDelegate = getRawVectorDelegate(quantizedVectorsWriter); - flushLucene99FlatVectorsWriter(rawVectorDelegate, maxDoc, sortMap, mappedFields); + flushLucene99FlatVectorsWriter(rawVectorDelegate, maxDoc, sortMap, mappedFields, vectorData); flushLucene99ScalarQuantizedVectorsWriter(quantizedVectorsWriter, maxDoc, sortMap); } var directory = FilterDirectory.unwrap(segmentWriteState.segmentInfo.dir); - logger.info( - "Directory is of type [{}], unwrapped [{}]", - segmentWriteState.segmentInfo.dir.getClass().getName(), - directory.getClass().getName() - ); - flushFieldsWithoutMemoryMappedFile(sortMap); - - // if (FsDirectoryFactory.isHybridFs(segmentWriteState.segmentInfo.dir) || mappedFields.isEmpty()) { - // // No tmp file written - // flushFieldsWithoutMemoryMappedFile(sortMap); - // } else { - // // If we have written one or more fields to a tmp file, read back the file to try and mmap it - // try (IndexInput in = directory.openInput(vectorData.getName(), IOContext.DEFAULT)) { - // var input = FilterIndexInput.unwrapOnlyTest(in); - // if (input instanceof MemorySegmentAccessInput memorySegmentAccessInput) { - // flushFieldsWithMemoryMappedFile(sortMap, memorySegmentAccessInput, mappedFields); - // } else { - // flushFieldsWithoutMemoryMappedFile(sortMap); - // } - // } - // } + if (FsDirectoryFactory.isHybridFs(segmentWriteState.segmentInfo.dir) == false || mappedFields.isEmpty()) { + // Not mappable, or no mapped fields flushed + logger.info("Flush: directory does not support mmap (class [{}])", directory); + flushFieldsWithoutMemoryMappedFile(sortMap); + } else { + // If we have written one or more fields to the raw vectorData file, read it back and try to mmap it + // TODO: this does not work; we open it and the file is (still) empty, possibly we are not flushing + // "hard enough". We _could_ get a NRT directory reader using DirectoryReader.open(IndexWriter, false, false); + // and then directoryReader.directory().openInput(), but a NRT directory reader looks way overkill here + try (IndexInput in = directory.openInput(vectorData.getName(), IOContext.DEFAULT)) { + var input = FilterIndexInput.unwrapOnlyTest(in); + if (input instanceof MemorySegmentAccessInput memorySegmentAccessInput) { + logger.info("Flush: Using mmaped raw vectorData"); + flushFieldsWithMemoryMappedFile(sortMap, memorySegmentAccessInput, mappedFields); + } else { + logger.info("Flush: input is not mmappable (class [{}])", input.getClass()); + flushFieldsWithoutMemoryMappedFile(sortMap); + } + } + } } private void flushLucene99ScalarQuantizedVectorsWriter( @@ -248,7 +247,8 @@ private void flushLucene99FlatVectorsWriter( Lucene99FlatVectorsWriter lucene99FlatVectorsWriter, int maxDoc, Sorter.DocMap sortMap, - Map mappedFields + Map mappedFields, + IndexOutput vectorData ) throws IOException { for (var field : fields) { FlatFieldVectorsWriter flatFieldVectorsWriter = getRawFieldVectorDelegate(field.flatFieldVectorsWriter); @@ -619,12 +619,7 @@ private ByteVectorValues getMergedByteVectorValues(FieldInfo fieldInfo, MergeSta final byte bits = 7; final Float confidenceInterval = null; ScalarQuantizer quantizer = mergeAndRecalculateQuantiles(mergeState, fieldInfo, confidenceInterval, bits); - MergedQuantizedVectorValues byteVectorValues = MergedQuantizedVectorValues.mergeQuantizedByteVectorValues( - fieldInfo, - mergeState, - quantizer - ); - return byteVectorValues; + return MergedQuantizedVectorValues.mergeQuantizedByteVectorValues(fieldInfo, mergeState, quantizer); } private static int writeByteVectorValues(IndexOutput out, ByteVectorValues vectorValues) throws IOException { @@ -757,10 +752,6 @@ public void addValue(int docID, float[] vectorValue) throws IOException { lastDocID = docID; } - public DocsWithFieldSet getDocsWithFieldSet() { - return flatFieldVectorsWriter.getDocsWithFieldSet(); - } - @Override public float[] copyValue(float[] vectorValue) { throw new UnsupportedOperationException(); From c051873f510bb944e022168390edbf05a22215a6 Mon Sep 17 00:00:00 2001 From: "elastic-vault-github-plugin-prod[bot]" <150874479+elastic-vault-github-plugin-prod[bot]@users.noreply.github.com> Date: Fri, 12 Sep 2025 04:31:46 +0000 Subject: [PATCH 074/109] Update cuvs snapshot version to 25.10.0 --- .buildkite/scripts/cuvs-snapshot/current-snapshot-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version index 460a915e67129..674bb0c30c007 100644 --- a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version +++ b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version @@ -1 +1 @@ -60244ca8 +607e9ebc From 367080ed623e48aa97ef8e43f5f6f4c26fd994c3 Mon Sep 17 00:00:00 2001 From: "elastic-vault-github-plugin-prod[bot]" <150874479+elastic-vault-github-plugin-prod[bot]@users.noreply.github.com> Date: Sun, 14 Sep 2025 04:32:07 +0000 Subject: [PATCH 075/109] Update cuvs snapshot version to 25.10.0 --- .buildkite/scripts/cuvs-snapshot/current-snapshot-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version index 674bb0c30c007..4d16f39fee450 100644 --- a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version +++ b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version @@ -1 +1 @@ -607e9ebc +e292a09e From 2839d5be462de24624372f66a3cf3516847bea31 Mon Sep 17 00:00:00 2001 From: "elastic-vault-github-plugin-prod[bot]" <150874479+elastic-vault-github-plugin-prod[bot]@users.noreply.github.com> Date: Tue, 16 Sep 2025 04:32:48 +0000 Subject: [PATCH 076/109] Update cuvs snapshot version to 25.10.0 --- .buildkite/scripts/cuvs-snapshot/current-snapshot-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version index 4d16f39fee450..d9f0fbb8248ca 100644 --- a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version +++ b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version @@ -1 +1 @@ -e292a09e +757b02a4 From 13fbf991ae504657610564609a204343c0716d33 Mon Sep 17 00:00:00 2001 From: "elastic-vault-github-plugin-prod[bot]" <150874479+elastic-vault-github-plugin-prod[bot]@users.noreply.github.com> Date: Wed, 17 Sep 2025 04:41:58 +0000 Subject: [PATCH 077/109] Update cuvs snapshot version to 25.10.0 --- .buildkite/scripts/cuvs-snapshot/current-snapshot-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version index d9f0fbb8248ca..3bc409766fe3d 100644 --- a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version +++ b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version @@ -1 +1 @@ -757b02a4 +587f9753 From 6a3743a9ad60a75aac567842e7322bd361d5ed3e Mon Sep 17 00:00:00 2001 From: Brian Seeders Date: Wed, 17 Sep 2025 14:15:40 -0400 Subject: [PATCH 078/109] Use elastic-cuvs-java as the cuvs-java artifact name, until we switch to the official one --- .buildkite/scripts/cuvs-snapshot/configure.sh | 4 ++-- gradle/verification-metadata.xml | 6 +++--- x-pack/plugin/gpu/build.gradle | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.buildkite/scripts/cuvs-snapshot/configure.sh b/.buildkite/scripts/cuvs-snapshot/configure.sh index 32e83ee9f7246..241d5f78900e0 100755 --- a/.buildkite/scripts/cuvs-snapshot/configure.sh +++ b/.buildkite/scripts/cuvs-snapshot/configure.sh @@ -36,8 +36,8 @@ LD_LIBRARY_PATH="$CUVS_DIR/libcuvs/linux-x64:$LD_LIBRARY_PATH" export LD_LIBRARY_PATH cd "$CUVS_DIR/cuvs-java/target" -mvn install:install-file -Dfile="cuvs-java-$CUVS_VERSION.jar" +mvn install:install-file -Dfile="cuvs-java-$CUVS_VERSION.jar" -DartifactId=elastic-cuvs-java -DgeneratePom=true cd "$ELASTICSEARCH_REPO_DIR" PLUGIN_GRADLE_FILE=x-pack/plugin/gpu/build.gradle -sed -i "s|implementation 'com.nvidia.cuvs:cuvs-java:.*'|implementation 'com.nvidia.cuvs:cuvs-java:$CUVS_VERSION'|" "$PLUGIN_GRADLE_FILE" +sed -i "s|implementation 'com.nvidia.cuvs:elastic-cuvs-java:.*'|implementation 'com.nvidia.cuvs:elastic-cuvs-java:$CUVS_VERSION'|" "$PLUGIN_GRADLE_FILE" diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 11696268ccf03..f888aab908bb4 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -17,7 +17,7 @@ - + @@ -1197,8 +1197,8 @@ - - + + diff --git a/x-pack/plugin/gpu/build.gradle b/x-pack/plugin/gpu/build.gradle index 74da7d119444f..9d45495e0227d 100644 --- a/x-pack/plugin/gpu/build.gradle +++ b/x-pack/plugin/gpu/build.gradle @@ -20,7 +20,7 @@ repositories { dependencies { compileOnly project(path: xpackModule('core')) compileOnly project(':server') - implementation('com.nvidia.cuvs:cuvs-java:25.10.0') { + implementation('com.nvidia.cuvs:elastic-cuvs-java:25.10.0') { changing = true // Ensure that we get updates even when the version number doesn't change. We can remove this once things stabilize } testImplementation(testArtifact(project(xpackModule('core')))) From d954f358d27904ddd7c1c22121e1e28d30c1164e Mon Sep 17 00:00:00 2001 From: "elastic-vault-github-plugin-prod[bot]" <150874479+elastic-vault-github-plugin-prod[bot]@users.noreply.github.com> Date: Thu, 18 Sep 2025 04:31:44 +0000 Subject: [PATCH 079/109] Update cuvs snapshot version to 25.10.0 --- .buildkite/scripts/cuvs-snapshot/current-snapshot-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version index 3bc409766fe3d..bdde663ce81e0 100644 --- a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version +++ b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version @@ -1 +1 @@ -587f9753 +5492a061 From ac3b4e5c02e22ab7edd15b20402e0172d9ce405e Mon Sep 17 00:00:00 2001 From: ldematte Date: Thu, 18 Sep 2025 09:56:02 +0200 Subject: [PATCH 080/109] Fix: signature changes + forbiddenApis + license file names --- .../reflect/VectorsFormatReflectionUtils.java | 22 +++++++++++-------- ...ENSE.txt => elastic-cuvs-java-LICENSE.txt} | 0 ...OTICE.txt => elastic-cuvs-java-NOTICE.txt} | 0 .../elasticsearch/plugin/gpu/GPUIndexIT.java | 10 ++++----- 4 files changed, 18 insertions(+), 14 deletions(-) rename x-pack/plugin/gpu/licenses/{cuvs-java-LICENSE.txt => elastic-cuvs-java-LICENSE.txt} (100%) rename x-pack/plugin/gpu/licenses/{cuvs-java-NOTICE.txt => elastic-cuvs-java-NOTICE.txt} (100%) diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/VectorsFormatReflectionUtils.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/VectorsFormatReflectionUtils.java index e34b4b3901b6e..3ffd0a92b2ebc 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/VectorsFormatReflectionUtils.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/VectorsFormatReflectionUtils.java @@ -45,8 +45,9 @@ public class VectorsFormatReflectionUtils { static { try { L99_F_VW_FIELD_WRITER_CLS = Class.forName("org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsWriter$FieldWriter"); - L99_SQ_VW_FIELD_WRITER_CLS = - Class.forName("org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsWriter$FieldWriter"); + L99_SQ_VW_FIELD_WRITER_CLS = Class.forName( + "org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsWriter$FieldWriter" + ); var lookup = MethodHandles.privateLookupIn(L99_F_VW_CLS, MethodHandles.lookup()); FLAT_VECTOR_DATA_HANDLE = lookup.findVarHandle(L99_F_VW_CLS, "vectorData", IndexOutput.class); lucene99FlatVectorsWriter_writeField$mh = lookup.findVirtual( @@ -93,7 +94,9 @@ public class VectorsFormatReflectionUtils { public static IndexOutput getQuantizedVectorDataIndexOutput(FlatVectorsWriter flatVectorWriter) { assert flatVectorWriter instanceof ES814ScalarQuantizedVectorsFormat.ES814ScalarQuantizedVectorsWriter; - var rawVectorDelegate = getRawVectorDelegate((ES814ScalarQuantizedVectorsFormat.ES814ScalarQuantizedVectorsWriter)flatVectorWriter); + var rawVectorDelegate = getRawVectorDelegate( + (ES814ScalarQuantizedVectorsFormat.ES814ScalarQuantizedVectorsWriter) flatVectorWriter + ); return (IndexOutput) FLAT_VECTOR_DATA_HANDLE.get(rawVectorDelegate); } @@ -105,10 +108,11 @@ public static IndexOutput getVectorDataIndexOutput(FlatVectorsWriter flatVectorW // private void Lucene99FlatVectorsWriter#writeField(FieldWriter fieldData, int maxDoc) public static void lucene99FlatVectorsWriter_writeField( Lucene99FlatVectorsWriter that, - FlatFieldVectorsWriter fieldData, int maxDoc + FlatFieldVectorsWriter fieldData, + int maxDoc ) { try { - lucene99FlatVectorsWriter_writeField$mh.invoke(that, fieldData, maxDoc); + lucene99FlatVectorsWriter_writeField$mh.invokeExact(that, fieldData, maxDoc); } catch (Throwable e) { throw new RuntimeException(e); } @@ -122,7 +126,7 @@ public static void lucene99FlatVectorsWriter_writeSortingField( Sorter.DocMap sortMap ) { try { - lucene99FlatVectorsWriter_writeSortingField$mh.invoke(that, fieldData, maxDoc, sortMap); + lucene99FlatVectorsWriter_writeSortingField$mh.invokeExact(that, fieldData, maxDoc, sortMap); } catch (Throwable e) { throw new RuntimeException(e); } @@ -136,7 +140,7 @@ public static void lucene99ScalarQuantizedVectorsWriter_writeField( ScalarQuantizer scalarQuantizer ) { try { - lucene99ScalarQuantizedVectorsWriter_writeField$mh.invoke(that, fieldData, maxDoc, scalarQuantizer); + lucene99ScalarQuantizedVectorsWriter_writeField$mh.invokeExact(that, fieldData, maxDoc, scalarQuantizer); } catch (Throwable e) { throw new RuntimeException(e); } @@ -151,7 +155,7 @@ public static void lucene99ScalarQuantizedVectorsWriter_writeSortingField( ScalarQuantizer scalarQuantizer ) { try { - lucene99ScalarQuantizedVectorsWriter_writeSortingField$mh.invoke(that, fieldData, maxDoc, sortMap, scalarQuantizer); + lucene99ScalarQuantizedVectorsWriter_writeSortingField$mh.invokeExact(that, fieldData, maxDoc, sortMap, scalarQuantizer); } catch (Throwable e) { throw new RuntimeException(e); } @@ -159,7 +163,7 @@ public static void lucene99ScalarQuantizedVectorsWriter_writeSortingField( public static ScalarQuantizer lucene99ScalarQuantizedVectorsWriter_FieldWriter_createQuantizer(FlatFieldVectorsWriter fieldData) { try { - return (ScalarQuantizer) lucene99ScalarQuantizedVectorsWriter_FieldWriter_createQuantizer$mh.invoke(fieldData); + return (ScalarQuantizer) lucene99ScalarQuantizedVectorsWriter_FieldWriter_createQuantizer$mh.invokeExact(fieldData); } catch (Throwable e) { throw new RuntimeException(e); } diff --git a/x-pack/plugin/gpu/licenses/cuvs-java-LICENSE.txt b/x-pack/plugin/gpu/licenses/elastic-cuvs-java-LICENSE.txt similarity index 100% rename from x-pack/plugin/gpu/licenses/cuvs-java-LICENSE.txt rename to x-pack/plugin/gpu/licenses/elastic-cuvs-java-LICENSE.txt diff --git a/x-pack/plugin/gpu/licenses/cuvs-java-NOTICE.txt b/x-pack/plugin/gpu/licenses/elastic-cuvs-java-NOTICE.txt similarity index 100% rename from x-pack/plugin/gpu/licenses/cuvs-java-NOTICE.txt rename to x-pack/plugin/gpu/licenses/elastic-cuvs-java-NOTICE.txt diff --git a/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java index 535efd8c2375c..00b56c081947a 100644 --- a/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java +++ b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java @@ -83,13 +83,13 @@ public void testSortedIndexReturnsSameResultsAsUnsorted() { var searchResponse1 = prepareSearch(indexName1).setSize(k) .setFetchSource(false) .addFetchField("my_keyword") - .setKnnSearch(List.of(new KnnSearchBuilder("my_vector", queryVector, k, numCandidates, null, null))) + .setKnnSearch(List.of(new KnnSearchBuilder("my_vector", queryVector, k, numCandidates, null, null, null))) .get(); var searchResponse2 = prepareSearch(indexName2).setSize(k) .setFetchSource(false) .addFetchField("my_keyword") - .setKnnSearch(List.of(new KnnSearchBuilder("my_vector", queryVector, k, numCandidates, null, null))) + .setKnnSearch(List.of(new KnnSearchBuilder("my_vector", queryVector, k, numCandidates, null, null, null))) .get(); try { @@ -114,13 +114,13 @@ public void testSortedIndexReturnsSameResultsAsUnsorted() { var searchResponse3 = prepareSearch(indexName1).setSize(k) .setFetchSource(false) .addFetchField("my_keyword") - .setKnnSearch(List.of(new KnnSearchBuilder("my_vector", queryVector, k, numCandidates, null, null))) + .setKnnSearch(List.of(new KnnSearchBuilder("my_vector", queryVector, k, numCandidates, null, null, null))) .get(); var searchResponse4 = prepareSearch(indexName2).setSize(k) .setFetchSource(false) .addFetchField("my_keyword") - .setKnnSearch(List.of(new KnnSearchBuilder("my_vector", queryVector, k, numCandidates, null, null))) + .setKnnSearch(List.of(new KnnSearchBuilder("my_vector", queryVector, k, numCandidates, null, null, null))) .get(); try { @@ -206,7 +206,7 @@ private void assertSearch(String indexName, float[] queryVector, int totalDocs) prepareSearch(indexName).setSize(k) .setFetchSource(false) .addFetchField("my_keyword") - .setKnnSearch(List.of(new KnnSearchBuilder("my_vector", queryVector, k, numCandidates, null, null))), + .setKnnSearch(List.of(new KnnSearchBuilder("my_vector", queryVector, k, numCandidates, null, null, null))), response -> { assertEquals("Expected k hits to be returned", k, response.getHits().getHits().length); } From fcd49ce5e6e150659d6c1482beb8fd940ffac1b5 Mon Sep 17 00:00:00 2001 From: ldematte Date: Thu, 18 Sep 2025 11:11:07 +0200 Subject: [PATCH 081/109] Revert "Enable mmapped file code path" This reverts commit e9ddaa087fb1eb35ee57c6cb27b084364156c0f2. --- .../gpu/codec/ESGpuHnswVectorsWriter.java | 60 ++++++++----------- 1 file changed, 24 insertions(+), 36 deletions(-) diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java index 892006e1d6e83..ec27dd99bd673 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java @@ -18,11 +18,13 @@ import org.apache.lucene.codecs.hnsw.FlatVectorsWriter; import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsWriter; import org.apache.lucene.index.ByteVectorValues; +import org.apache.lucene.index.DocsWithFieldSet; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FloatVectorValues; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.KnnVectorValues; import org.apache.lucene.index.MergeState; +import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.Sorter; import org.apache.lucene.index.VectorEncoding; @@ -43,7 +45,6 @@ import org.elasticsearch.core.SuppressForbidden; import org.elasticsearch.index.codec.vectors.ES814ScalarQuantizedVectorsFormat; import org.elasticsearch.index.codec.vectors.reflect.VectorsFormatReflectionUtils; -import org.elasticsearch.index.store.FsDirectoryFactory; import org.elasticsearch.logging.LogManager; import org.elasticsearch.logging.Logger; @@ -56,7 +57,6 @@ import java.util.List; import java.util.Map; import java.util.Objects; -import java.util.function.Supplier; import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.SIMILARITY_FUNCTIONS; import static org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsWriter.mergeAndRecalculateQuantiles; @@ -87,7 +87,7 @@ final class ESGpuHnswVectorsWriter extends KnnVectorsWriter { private final CuVSResourceManager cuVSResourceManager; private final SegmentWriteState segmentWriteState; private final IndexOutput meta, vectorIndex; - private final Supplier vectorDataSupplier; + private final IndexOutput vectorData; private final int M; private final int beamWidth; private final FlatVectorsWriter flatVectorWriter; @@ -109,11 +109,11 @@ final class ESGpuHnswVectorsWriter extends KnnVectorsWriter { this.beamWidth = beamWidth; this.flatVectorWriter = flatVectorWriter; if (flatVectorWriter instanceof ES814ScalarQuantizedVectorsFormat.ES814ScalarQuantizedVectorsWriter) { - vectorDataSupplier = () -> VectorsFormatReflectionUtils.getQuantizedVectorDataIndexOutput(flatVectorWriter); + vectorData = VectorsFormatReflectionUtils.getQuantizedVectorDataIndexOutput(flatVectorWriter); dataType = CuVSMatrix.DataType.BYTE; } else { assert flatVectorWriter instanceof Lucene99FlatVectorsWriter; - vectorDataSupplier = () -> VectorsFormatReflectionUtils.getVectorDataIndexOutput(flatVectorWriter); + vectorData = VectorsFormatReflectionUtils.getVectorDataIndexOutput(flatVectorWriter); dataType = CuVSMatrix.DataType.FLOAT; } this.segmentWriteState = state; @@ -178,42 +178,22 @@ private record FieldEntry(long vectorDataOffset, long vectorDataLength) {} */ @Override public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException { - var mappedFields = new HashMap(); - var vectorData = vectorDataSupplier.get(); + // var mappedFields = new HashMap(); + // flatVectorWriterFlush(maxDoc, sortMap, mappedFields); + flatVectorWriter.flush(maxDoc, sortMap); + flushFieldsWithoutMemoryMappedFile(sortMap); + } - // Reproduce flatVectorWriter.flush() + private void flatVectorWriterFlush(int maxDoc, Sorter.DocMap sortMap, HashMap mappedFields) throws IOException { if (flatVectorWriter instanceof Lucene99FlatVectorsWriter lucene99FlatVectorsWriter) { - flushLucene99FlatVectorsWriter(lucene99FlatVectorsWriter, maxDoc, sortMap, mappedFields, vectorData); + flushLucene99FlatVectorsWriter(lucene99FlatVectorsWriter, maxDoc, sortMap, mappedFields); } else { assert flatVectorWriter instanceof ES814ScalarQuantizedVectorsFormat.ES814ScalarQuantizedVectorsWriter; var quantizedVectorsWriter = (ES814ScalarQuantizedVectorsFormat.ES814ScalarQuantizedVectorsWriter) flatVectorWriter; Lucene99FlatVectorsWriter rawVectorDelegate = getRawVectorDelegate(quantizedVectorsWriter); - flushLucene99FlatVectorsWriter(rawVectorDelegate, maxDoc, sortMap, mappedFields, vectorData); + flushLucene99FlatVectorsWriter(rawVectorDelegate, maxDoc, sortMap, mappedFields); flushLucene99ScalarQuantizedVectorsWriter(quantizedVectorsWriter, maxDoc, sortMap); } - - var directory = FilterDirectory.unwrap(segmentWriteState.segmentInfo.dir); - - if (FsDirectoryFactory.isHybridFs(segmentWriteState.segmentInfo.dir) == false || mappedFields.isEmpty()) { - // Not mappable, or no mapped fields flushed - logger.info("Flush: directory does not support mmap (class [{}])", directory); - flushFieldsWithoutMemoryMappedFile(sortMap); - } else { - // If we have written one or more fields to the raw vectorData file, read it back and try to mmap it - // TODO: this does not work; we open it and the file is (still) empty, possibly we are not flushing - // "hard enough". We _could_ get a NRT directory reader using DirectoryReader.open(IndexWriter, false, false); - // and then directoryReader.directory().openInput(), but a NRT directory reader looks way overkill here - try (IndexInput in = directory.openInput(vectorData.getName(), IOContext.DEFAULT)) { - var input = FilterIndexInput.unwrapOnlyTest(in); - if (input instanceof MemorySegmentAccessInput memorySegmentAccessInput) { - logger.info("Flush: Using mmaped raw vectorData"); - flushFieldsWithMemoryMappedFile(sortMap, memorySegmentAccessInput, mappedFields); - } else { - logger.info("Flush: input is not mmappable (class [{}])", input.getClass()); - flushFieldsWithoutMemoryMappedFile(sortMap); - } - } - } } private void flushLucene99ScalarQuantizedVectorsWriter( @@ -247,8 +227,7 @@ private void flushLucene99FlatVectorsWriter( Lucene99FlatVectorsWriter lucene99FlatVectorsWriter, int maxDoc, Sorter.DocMap sortMap, - Map mappedFields, - IndexOutput vectorData + Map mappedFields ) throws IOException { for (var field : fields) { FlatFieldVectorsWriter flatFieldVectorsWriter = getRawFieldVectorDelegate(field.flatFieldVectorsWriter); @@ -619,7 +598,12 @@ private ByteVectorValues getMergedByteVectorValues(FieldInfo fieldInfo, MergeSta final byte bits = 7; final Float confidenceInterval = null; ScalarQuantizer quantizer = mergeAndRecalculateQuantiles(mergeState, fieldInfo, confidenceInterval, bits); - return MergedQuantizedVectorValues.mergeQuantizedByteVectorValues(fieldInfo, mergeState, quantizer); + MergedQuantizedVectorValues byteVectorValues = MergedQuantizedVectorValues.mergeQuantizedByteVectorValues( + fieldInfo, + mergeState, + quantizer + ); + return byteVectorValues; } private static int writeByteVectorValues(IndexOutput out, ByteVectorValues vectorValues) throws IOException { @@ -752,6 +736,10 @@ public void addValue(int docID, float[] vectorValue) throws IOException { lastDocID = docID; } + public DocsWithFieldSet getDocsWithFieldSet() { + return flatFieldVectorsWriter.getDocsWithFieldSet(); + } + @Override public float[] copyValue(float[] vectorValue) { throw new UnsupportedOperationException(); From 9e9decae9379a4c1840b0b0c233372a6a2fff253 Mon Sep 17 00:00:00 2001 From: "elastic-vault-github-plugin-prod[bot]" <150874479+elastic-vault-github-plugin-prod[bot]@users.noreply.github.com> Date: Fri, 19 Sep 2025 04:34:11 +0000 Subject: [PATCH 082/109] Update cuvs snapshot version to 25.10.0 --- .buildkite/scripts/cuvs-snapshot/current-snapshot-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version index bdde663ce81e0..fd1a4e7ffb430 100644 --- a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version +++ b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version @@ -1 +1 @@ -5492a061 +be6024d0 From 8e102b1336e942d74c3da8b200796d28c916c011 Mon Sep 17 00:00:00 2001 From: "elastic-vault-github-plugin-prod[bot]" <150874479+elastic-vault-github-plugin-prod[bot]@users.noreply.github.com> Date: Sat, 20 Sep 2025 04:32:33 +0000 Subject: [PATCH 083/109] Update cuvs snapshot version to 25.10.0 --- .buildkite/scripts/cuvs-snapshot/current-snapshot-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version index fd1a4e7ffb430..75de90e35e663 100644 --- a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version +++ b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version @@ -1 +1 @@ -be6024d0 +d02e0fc1 From 48ebb1fbb05f47c2e727bf5b29042b70559ab89b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lorenzo=20Dematt=C3=A9?= Date: Mon, 22 Sep 2025 07:53:56 +0200 Subject: [PATCH 084/109] Expand gpu support checks (#135000) This PR extends GPUSupport to check for GPU presence and compatibility too. It uses SPI to inject a test GPUInformationProvider to mock the presence/absence and type of GPU. It also ensures GPUSupport is used in all test suites where the presence of a GPU/of cuvs is needed for the test to run correctly. The PR also adds extensive tests around GPUPlugin initialization, to ensure we get the desired on/off behaviour with respect to feature flag setting, index settings, index type and GPU support. --- .../vectors/DenseVectorFieldTypeTests.java | 28 +- x-pack/plugin/gpu/build.gradle | 1 + .../plugin/gpu/CuVSProviderDelegate.java | 112 ++++++++ .../elasticsearch/plugin/gpu/GPUIndexIT.java | 11 +- .../plugin/gpu/GPUPluginInitializationIT.java | 264 ++++++++++++++++++ .../com.nvidia.cuvs.spi.CuVSServiceProvider | 8 + .../elasticsearch/xpack/gpu/GPUSupport.java | 71 ++++- .../xpack/gpu/codec/CuVSResourceManager.java | 25 +- .../gpu/codec/ESGpuHnswSQVectorsFormat.java | 13 +- .../gpu/codec/ESGpuHnswVectorsFormat.java | 19 +- .../gpu/codec/ESGpuHnswVectorsWriter.java | 2 - .../codec/ESGpuHnswSQVectorsFormatTests.java | 5 +- .../codec/ESGpuHnswVectorsFormatTests.java | 5 +- .../codec/GPUDenseVectorFieldMapperTests.java | 6 +- .../xpack/gpu/GPUClientYamlTestSuiteIT.java | 28 +- 15 files changed, 554 insertions(+), 44 deletions(-) create mode 100644 x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/CuVSProviderDelegate.java create mode 100644 x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUPluginInitializationIT.java create mode 100644 x-pack/plugin/gpu/src/internalClusterTest/resources/META-INF/services/com.nvidia.cuvs.spi.CuVSServiceProvider diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java index 82ac6a76f5adf..d26871f33ca6d 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java @@ -65,13 +65,39 @@ private static DenseVectorFieldMapper.RescoreVector randomRescoreVector() { return new DenseVectorFieldMapper.RescoreVector(randomBoolean() ? 0 : randomFloatBetween(1.0F, 10.0F, false)); } - private DenseVectorFieldMapper.DenseVectorIndexOptions randomIndexOptionsNonQuantized() { + private static DenseVectorFieldMapper.DenseVectorIndexOptions randomIndexOptionsNonQuantized() { return randomFrom( new DenseVectorFieldMapper.HnswIndexOptions(randomIntBetween(1, 100), randomIntBetween(1, 10_000)), new DenseVectorFieldMapper.FlatIndexOptions() ); } + public static DenseVectorFieldMapper.DenseVectorIndexOptions randomFlatIndexOptions() { + return randomFrom( + new DenseVectorFieldMapper.FlatIndexOptions(), + new DenseVectorFieldMapper.Int8FlatIndexOptions( + randomFrom((Float) null, 0f, (float) randomDoubleBetween(0.9, 1.0, true)), + randomFrom((DenseVectorFieldMapper.RescoreVector) null, randomRescoreVector()) + ), + new DenseVectorFieldMapper.Int4FlatIndexOptions( + randomFrom((Float) null, 0f, (float) randomDoubleBetween(0.9, 1.0, true)), + randomFrom((DenseVectorFieldMapper.RescoreVector) null, randomRescoreVector()) + ) + ); + } + + public static DenseVectorFieldMapper.DenseVectorIndexOptions randomGpuSupportedIndexOptions() { + return randomFrom( + new DenseVectorFieldMapper.HnswIndexOptions(randomIntBetween(1, 100), randomIntBetween(1, 3199)), + new DenseVectorFieldMapper.Int8HnswIndexOptions( + randomIntBetween(1, 100), + randomIntBetween(1, 3199), + randomFrom((Float) null, 0f, (float) randomDoubleBetween(0.9, 1.0, true)), + randomFrom((DenseVectorFieldMapper.RescoreVector) null, randomRescoreVector()) + ) + ); + } + public static DenseVectorFieldMapper.DenseVectorIndexOptions randomIndexOptionsAll() { List options = new ArrayList<>( Arrays.asList( diff --git a/x-pack/plugin/gpu/build.gradle b/x-pack/plugin/gpu/build.gradle index 9d45495e0227d..1ea64f1f1e73d 100644 --- a/x-pack/plugin/gpu/build.gradle +++ b/x-pack/plugin/gpu/build.gradle @@ -25,6 +25,7 @@ dependencies { } testImplementation(testArtifact(project(xpackModule('core')))) testImplementation(testArtifact(project(':server'))) + yamlRestTestImplementation(project(xpackModule('gpu'))) clusterModules project(xpackModule('gpu')) } diff --git a/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/CuVSProviderDelegate.java b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/CuVSProviderDelegate.java new file mode 100644 index 0000000000000..d0f8e85ef6070 --- /dev/null +++ b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/CuVSProviderDelegate.java @@ -0,0 +1,112 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.plugin.gpu; + +import com.nvidia.cuvs.BruteForceIndex; +import com.nvidia.cuvs.CagraIndex; +import com.nvidia.cuvs.CuVSDeviceMatrix; +import com.nvidia.cuvs.CuVSHostMatrix; +import com.nvidia.cuvs.CuVSMatrix; +import com.nvidia.cuvs.CuVSResources; +import com.nvidia.cuvs.GPUInfoProvider; +import com.nvidia.cuvs.HnswIndex; +import com.nvidia.cuvs.TieredIndex; +import com.nvidia.cuvs.spi.CuVSProvider; + +import java.lang.invoke.MethodHandle; +import java.nio.file.Path; + +class CuVSProviderDelegate implements CuVSProvider { + private final CuVSProvider delegate; + + CuVSProviderDelegate(CuVSProvider delegate) { + this.delegate = delegate; + } + + @Override + public CuVSResources newCuVSResources(Path path) throws Throwable { + return delegate.newCuVSResources(path); + } + + @Override + public CuVSMatrix.Builder newHostMatrixBuilder(long l, long l1, CuVSMatrix.DataType dataType) { + return delegate.newHostMatrixBuilder(l, l1, dataType); + } + + @Override + public CuVSMatrix.Builder newDeviceMatrixBuilder( + CuVSResources cuVSResources, + long l, + long l1, + CuVSMatrix.DataType dataType + ) { + return delegate.newDeviceMatrixBuilder(cuVSResources, l, l1, dataType); + } + + @Override + public CuVSMatrix.Builder newDeviceMatrixBuilder( + CuVSResources cuVSResources, + long l, + long l1, + int i, + int i1, + CuVSMatrix.DataType dataType + ) { + return delegate.newDeviceMatrixBuilder(cuVSResources, l, l1, i, i1, dataType); + } + + @Override + public MethodHandle newNativeMatrixBuilder() { + return delegate.newNativeMatrixBuilder(); + } + + @Override + public CuVSMatrix newMatrixFromArray(float[][] floats) { + return delegate.newMatrixFromArray(floats); + } + + @Override + public CuVSMatrix newMatrixFromArray(int[][] ints) { + return delegate.newMatrixFromArray(ints); + } + + @Override + public CuVSMatrix newMatrixFromArray(byte[][] bytes) { + return delegate.newMatrixFromArray(bytes); + } + + @Override + public BruteForceIndex.Builder newBruteForceIndexBuilder(CuVSResources cuVSResources) throws UnsupportedOperationException { + return delegate.newBruteForceIndexBuilder(cuVSResources); + } + + @Override + public CagraIndex.Builder newCagraIndexBuilder(CuVSResources cuVSResources) throws UnsupportedOperationException { + return delegate.newCagraIndexBuilder(cuVSResources); + } + + @Override + public HnswIndex.Builder newHnswIndexBuilder(CuVSResources cuVSResources) throws UnsupportedOperationException { + return delegate.newHnswIndexBuilder(cuVSResources); + } + + @Override + public TieredIndex.Builder newTieredIndexBuilder(CuVSResources cuVSResources) throws UnsupportedOperationException { + return delegate.newTieredIndexBuilder(cuVSResources); + } + + @Override + public CagraIndex mergeCagraIndexes(CagraIndex[] cagraIndices) throws Throwable { + return delegate.mergeCagraIndexes(cagraIndices); + } + + @Override + public GPUInfoProvider gpuInfoProvider() { + return delegate.gpuInfoProvider(); + } +} diff --git a/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java index 00b56c081947a..5d2ad832c657b 100644 --- a/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java +++ b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java @@ -18,6 +18,7 @@ import org.elasticsearch.xpack.gpu.GPUPlugin; import org.elasticsearch.xpack.gpu.GPUSupport; import org.junit.Assert; +import org.junit.BeforeClass; import java.util.Collection; import java.util.List; @@ -35,8 +36,12 @@ protected Collection> nodePlugins() { return List.of(GPUPlugin.class); } - public void testBasic() { + @BeforeClass + public static void checkGPUSupport() { assumeTrue("cuvs not supported", GPUSupport.isSupported(false)); + } + + public void testBasic() { String indexName = "index1"; final int dims = randomIntBetween(4, 128); final int[] numDocs = new int[] { randomIntBetween(1, 100), 1, 2, randomIntBetween(1, 100) }; @@ -51,7 +56,6 @@ public void testBasic() { } public void testSortedIndexReturnsSameResultsAsUnsorted() { - assumeTrue("cuvs not supported", GPUSupport.isSupported(false)); String indexName1 = "index_unsorted"; String indexName2 = "index_sorted"; final int dims = randomIntBetween(4, 128); @@ -130,7 +134,7 @@ public void testSortedIndexReturnsSameResultsAsUnsorted() { for (int i = 0; i < hits3.length; i++) { Assert.assertEquals(hits3[i].getId(), hits4[i].getId()); Assert.assertEquals((String) hits3[i].field("my_keyword").getValue(), (String) hits4[i].field("my_keyword").getValue()); - Assert.assertEquals(hits3[i].getScore(), hits4[i].getScore(), 0.0001f); + Assert.assertEquals(hits3[i].getScore(), hits4[i].getScore(), 0.01f); } } finally { searchResponse3.decRef(); @@ -139,7 +143,6 @@ public void testSortedIndexReturnsSameResultsAsUnsorted() { } public void testSearchWithoutGPU() { - assumeTrue("cuvs not supported", GPUSupport.isSupported(false)); String indexName = "index1"; final int dims = randomIntBetween(4, 128); final int numDocs = randomIntBetween(1, 500); diff --git a/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUPluginInitializationIT.java b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUPluginInitializationIT.java new file mode 100644 index 0000000000000..322bf286feeac --- /dev/null +++ b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUPluginInitializationIT.java @@ -0,0 +1,264 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.plugin.gpu; + +import com.nvidia.cuvs.CuVSResources; +import com.nvidia.cuvs.CuVSResourcesInfo; +import com.nvidia.cuvs.GPUInfo; +import com.nvidia.cuvs.GPUInfoProvider; +import com.nvidia.cuvs.spi.CuVSProvider; +import com.nvidia.cuvs.spi.CuVSServiceProvider; + +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldTypeTests; +import org.elasticsearch.index.mapper.vectors.VectorsFormatProvider; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.test.ESIntegTestCase; +import org.elasticsearch.xpack.gpu.GPUPlugin; +import org.junit.After; + +import java.util.Collection; +import java.util.List; +import java.util.function.Function; + +import static org.elasticsearch.index.IndexSettingsTests.newIndexMeta; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.startsWith; + +public class GPUPluginInitializationIT extends ESIntegTestCase { + + private static final Function SUPPORTED_GPU_PROVIDER = + p -> new TestCuVSServiceProvider.TestGPUInfoProvider( + List.of( + new GPUInfo( + 0, + "TestGPU", + 8 * 1024 * 1024 * 1024L, + GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MAJOR, + GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MINOR, + true, + true + ) + ) + ); + + private static final Function NO_GPU_PROVIDER = p -> new TestCuVSServiceProvider.TestGPUInfoProvider( + List.of() + ); + + @Override + protected Collection> nodePlugins() { + return List.of(GPUPlugin.class); + } + + public static class TestCuVSServiceProvider extends CuVSServiceProvider { + + static final Function BUILTIN_GPU_INFO_PROVIDER = CuVSProvider::gpuInfoProvider; + static Function mockedGPUInfoProvider = BUILTIN_GPU_INFO_PROVIDER; + + @Override + public CuVSProvider get(CuVSProvider builtin) { + return new CuVSProviderDelegate(builtin) { + @Override + public GPUInfoProvider gpuInfoProvider() { + return mockedGPUInfoProvider.apply(builtin); + } + }; + } + + private static class TestGPUInfoProvider implements GPUInfoProvider { + private final List gpuList; + + private TestGPUInfoProvider(List gpuList) { + this.gpuList = gpuList; + } + + @Override + public List availableGPUs() { + return gpuList; + } + + @Override + public List compatibleGPUs() { + return gpuList; + } + + @Override + public CuVSResourcesInfo getCurrentInfo(CuVSResources cuVSResources) { + return null; + } + } + } + + @After + public void disableMock() { + TestCuVSServiceProvider.mockedGPUInfoProvider = TestCuVSServiceProvider.BUILTIN_GPU_INFO_PROVIDER; + } + + public void testFFOff() { + assumeFalse("GPU_FORMAT feature flag disabled", GPUPlugin.GPU_FORMAT.isEnabled()); + + GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); + VectorsFormatProvider vectorsFormatProvider = gpuPlugin.getVectorsFormatProvider(); + + var format = vectorsFormatProvider.getKnnVectorsFormat(null, null); + assertNull(format); + } + + public void testIndexSettingOnIndexTypeSupportedGPUSupported() { + assumeTrue("GPU_FORMAT feature flag enabled", GPUPlugin.GPU_FORMAT.isEnabled()); + TestCuVSServiceProvider.mockedGPUInfoProvider = SUPPORTED_GPU_PROVIDER; + + GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); + VectorsFormatProvider vectorsFormatProvider = gpuPlugin.getVectorsFormatProvider(); + + final var metadata = newIndexMeta( + "index1", + Settings.builder().put(IndexSettings.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), IndexSettings.GpuMode.TRUE).build() + ); + final var settings = new IndexSettings(metadata, Settings.EMPTY); + final var indexOptions = DenseVectorFieldTypeTests.randomGpuSupportedIndexOptions(); + + var format = vectorsFormatProvider.getKnnVectorsFormat(settings, indexOptions); + assertNotNull(format); + } + + public void testIndexSettingOnIndexTypeNotSupportedThrows() { + assumeTrue("GPU_FORMAT feature flag enabled", GPUPlugin.GPU_FORMAT.isEnabled()); + TestCuVSServiceProvider.mockedGPUInfoProvider = SUPPORTED_GPU_PROVIDER; + + GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); + VectorsFormatProvider vectorsFormatProvider = gpuPlugin.getVectorsFormatProvider(); + + final var metadata = newIndexMeta( + "index1", + Settings.builder().put(IndexSettings.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), IndexSettings.GpuMode.TRUE).build() + ); + final var settings = new IndexSettings(metadata, Settings.EMPTY); + final var indexOptions = DenseVectorFieldTypeTests.randomFlatIndexOptions(); + + var ex = expectThrows(IllegalArgumentException.class, () -> vectorsFormatProvider.getKnnVectorsFormat(settings, indexOptions)); + assertThat(ex.getMessage(), startsWith("[index.vectors.indexing.use_gpu] doesn't support [index_options.type] of")); + } + + public void testIndexSettingOnGPUNotSupportedThrows() { + assumeTrue("GPU_FORMAT feature flag enabled", GPUPlugin.GPU_FORMAT.isEnabled()); + TestCuVSServiceProvider.mockedGPUInfoProvider = NO_GPU_PROVIDER; + + GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); + VectorsFormatProvider vectorsFormatProvider = gpuPlugin.getVectorsFormatProvider(); + + final var metadata = newIndexMeta( + "index1", + Settings.builder().put(IndexSettings.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), IndexSettings.GpuMode.TRUE).build() + ); + final var settings = new IndexSettings(metadata, Settings.EMPTY); + final var indexOptions = DenseVectorFieldTypeTests.randomGpuSupportedIndexOptions(); + + var ex = expectThrows(IllegalArgumentException.class, () -> vectorsFormatProvider.getKnnVectorsFormat(settings, indexOptions)); + assertThat( + ex.getMessage(), + equalTo("[index.vectors.indexing.use_gpu] was set to [true], but GPU resources are not accessible on the node.") + ); + } + + public void testIndexSettingOnGPUSupportThrowsRethrows() { + assumeTrue("GPU_FORMAT feature flag enabled", GPUPlugin.GPU_FORMAT.isEnabled()); + // Mocks a cuvs-java UnsupportedProvider + TestCuVSServiceProvider.mockedGPUInfoProvider = p -> { throw new UnsupportedOperationException("cuvs-java UnsupportedProvider"); }; + + GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); + VectorsFormatProvider vectorsFormatProvider = gpuPlugin.getVectorsFormatProvider(); + + final var metadata = newIndexMeta( + "index1", + Settings.builder().put(IndexSettings.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), IndexSettings.GpuMode.TRUE).build() + ); + final var settings = new IndexSettings(metadata, Settings.EMPTY); + final var indexOptions = DenseVectorFieldTypeTests.randomGpuSupportedIndexOptions(); + + var ex = expectThrows(IllegalArgumentException.class, () -> vectorsFormatProvider.getKnnVectorsFormat(settings, indexOptions)); + assertThat( + ex.getMessage(), + equalTo("[index.vectors.indexing.use_gpu] was set to [true], but GPU resources are not accessible on the node.") + ); + } + + public void testIndexSettingAutoIndexTypeSupportedGPUSupported() { + assumeTrue("GPU_FORMAT feature flag enabled", GPUPlugin.GPU_FORMAT.isEnabled()); + TestCuVSServiceProvider.mockedGPUInfoProvider = SUPPORTED_GPU_PROVIDER; + + GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); + VectorsFormatProvider vectorsFormatProvider = gpuPlugin.getVectorsFormatProvider(); + + final var metadata = newIndexMeta( + "index1", + Settings.builder().put(IndexSettings.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), IndexSettings.GpuMode.AUTO).build() + ); + final var settings = new IndexSettings(metadata, Settings.EMPTY); + final var indexOptions = DenseVectorFieldTypeTests.randomGpuSupportedIndexOptions(); + + var format = vectorsFormatProvider.getKnnVectorsFormat(settings, indexOptions); + assertNotNull(format); + } + + public void testIndexSettingAutoGPUNotSupported() { + assumeTrue("GPU_FORMAT feature flag enabled", GPUPlugin.GPU_FORMAT.isEnabled()); + TestCuVSServiceProvider.mockedGPUInfoProvider = NO_GPU_PROVIDER; + + GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); + VectorsFormatProvider vectorsFormatProvider = gpuPlugin.getVectorsFormatProvider(); + + final var metadata = newIndexMeta( + "index1", + Settings.builder().put(IndexSettings.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), IndexSettings.GpuMode.AUTO).build() + ); + final var settings = new IndexSettings(metadata, Settings.EMPTY); + final var indexOptions = DenseVectorFieldTypeTests.randomGpuSupportedIndexOptions(); + + var format = vectorsFormatProvider.getKnnVectorsFormat(settings, indexOptions); + assertNull(format); + } + + public void testIndexSettingAutoIndexTypeNotSupported() { + assumeTrue("GPU_FORMAT feature flag enabled", GPUPlugin.GPU_FORMAT.isEnabled()); + TestCuVSServiceProvider.mockedGPUInfoProvider = SUPPORTED_GPU_PROVIDER; + + GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); + VectorsFormatProvider vectorsFormatProvider = gpuPlugin.getVectorsFormatProvider(); + + final var metadata = newIndexMeta( + "index1", + Settings.builder().put(IndexSettings.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), IndexSettings.GpuMode.AUTO).build() + ); + final var settings = new IndexSettings(metadata, Settings.EMPTY); + final var indexOptions = DenseVectorFieldTypeTests.randomFlatIndexOptions(); + + var format = vectorsFormatProvider.getKnnVectorsFormat(settings, indexOptions); + assertNull(format); + } + + public void testIndexSettingOff() { + assumeTrue("GPU_FORMAT feature flag enabled", GPUPlugin.GPU_FORMAT.isEnabled()); + TestCuVSServiceProvider.mockedGPUInfoProvider = SUPPORTED_GPU_PROVIDER; + + GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); + VectorsFormatProvider vectorsFormatProvider = gpuPlugin.getVectorsFormatProvider(); + + final var metadata = newIndexMeta( + "index1", + Settings.builder().put(IndexSettings.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), IndexSettings.GpuMode.FALSE).build() + ); + final var settings = new IndexSettings(metadata, Settings.EMPTY); + final var indexOptions = DenseVectorFieldTypeTests.randomGpuSupportedIndexOptions(); + + var format = vectorsFormatProvider.getKnnVectorsFormat(settings, indexOptions); + assertNull(format); + } +} diff --git a/x-pack/plugin/gpu/src/internalClusterTest/resources/META-INF/services/com.nvidia.cuvs.spi.CuVSServiceProvider b/x-pack/plugin/gpu/src/internalClusterTest/resources/META-INF/services/com.nvidia.cuvs.spi.CuVSServiceProvider new file mode 100644 index 0000000000000..d5d524f49aa81 --- /dev/null +++ b/x-pack/plugin/gpu/src/internalClusterTest/resources/META-INF/services/com.nvidia.cuvs.spi.CuVSServiceProvider @@ -0,0 +1,8 @@ +# +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License +# 2.0; you may not use this file except in compliance with the Elastic License +# 2.0. +# + +org.elasticsearch.plugin.gpu.GPUPluginInitializationIT$TestCuVSServiceProvider diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java index f1ff6bcffd1d2..c21bda894790a 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java @@ -8,7 +8,10 @@ package org.elasticsearch.xpack.gpu; import com.nvidia.cuvs.CuVSResources; +import com.nvidia.cuvs.GPUInfoProvider; +import com.nvidia.cuvs.spi.CuVSProvider; +import org.elasticsearch.common.Strings; import org.elasticsearch.logging.LogManager; import org.elasticsearch.logging.Logger; @@ -16,11 +19,73 @@ public class GPUSupport { private static final Logger LOG = LogManager.getLogger(GPUSupport.class); + // Set the minimum at 7.5GB: 8GB GPUs (which are our targeted minimum) report less than that via the API + private static final long MIN_DEVICE_MEMORY_IN_BYTES = 8053063680L; + /** Tells whether the platform supports cuvs. */ public static boolean isSupported(boolean logError) { - try (var resources = cuVSResourcesOrNull(logError)) { - if (resources != null) { - return true; + try { + var gpuInfoProvider = CuVSProvider.provider().gpuInfoProvider(); + var availableGPUs = gpuInfoProvider.availableGPUs(); + if (availableGPUs.isEmpty()) { + if (logError) { + LOG.warn("No GPU found"); + } + return false; + } + + for (var gpu : availableGPUs) { + if (gpu.computeCapabilityMajor() < GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MAJOR + || (gpu.computeCapabilityMajor() == GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MAJOR + && gpu.computeCapabilityMinor() < GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MINOR)) { + if (logError) { + LOG.warn( + "GPU [{}] does not have the minimum compute capabilities (required: [{}.{}], found: [{}.{}])", + gpu.name(), + GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MAJOR, + GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MINOR, + gpu.computeCapabilityMajor(), + gpu.computeCapabilityMinor() + ); + } + } else if (gpu.totalDeviceMemoryInBytes() < MIN_DEVICE_MEMORY_IN_BYTES) { + if (logError) { + LOG.warn( + "GPU [{}] does not have minimum memory required (required: [{}], found: [{}])", + gpu.name(), + MIN_DEVICE_MEMORY_IN_BYTES, + gpu.totalDeviceMemoryInBytes() + ); + } + } else { + if (logError) { + LOG.info("Found compatible GPU [{}] (id: [{}])", gpu.name(), gpu.gpuId()); + } + return true; + } + } + + } catch (UnsupportedOperationException uoe) { + if (logError) { + final String msg; + if (uoe.getMessage() == null) { + msg = Strings.format( + "runtime Java version [%d], OS [%s], arch [%s]", + Runtime.version().feature(), + System.getProperty("os.name"), + System.getProperty("os.arch") + ); + } else { + msg = uoe.getMessage(); + } + LOG.warn("GPU based vector indexing is not supported on this platform; " + msg); + } + } catch (Throwable t) { + if (logError) { + if (t instanceof ExceptionInInitializerError ex) { + t = ex.getCause(); + } + LOG.warn("Exception occurred during creation of cuvs resources", t); } } return false; diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManager.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManager.java index 26e4e94ed57ea..a62585c562342 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManager.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManager.java @@ -63,7 +63,7 @@ public interface CuVSResourceManager { /** Returns the system-wide pooling manager. */ static CuVSResourceManager pooling() { - return PoolingCuVSResourceManager.INSTANCE; + return PoolingCuVSResourceManager.Holder.INSTANCE; } /** @@ -76,10 +76,13 @@ class PoolingCuVSResourceManager implements CuVSResourceManager { /** A multiplier on input data to account for intermediate and output data size required while processing it */ static final double GPU_COMPUTATION_MEMORY_FACTOR = 2.0; static final int MAX_RESOURCES = 2; - static final PoolingCuVSResourceManager INSTANCE = new PoolingCuVSResourceManager( - MAX_RESOURCES, - CuVSProvider.provider().gpuInfoProvider() - ); + + static class Holder { + static final PoolingCuVSResourceManager INSTANCE = new PoolingCuVSResourceManager( + MAX_RESOURCES, + CuVSProvider.provider().gpuInfoProvider() + ); + } private final ManagedCuVSResources[] pool; private final int capacity; @@ -137,7 +140,7 @@ public ManagedCuVSResources acquire(int numVectors, int dims, CuVSMatrix.DataTyp final boolean enoughMemory; if (res != null) { long requiredMemoryInBytes = estimateRequiredMemory(numVectors, dims, dataType); - logger.info( + logger.debug( "Estimated memory for [{}] vectors, [{}] dims of type [{}] is [{} B]", numVectors, dims, @@ -160,16 +163,16 @@ public ManagedCuVSResources acquire(int numVectors, int dims, CuVSMatrix.DataTyp // If no resource in the pool is locked, short circuit to avoid livelock if (numLockedResources() == 0) { - logger.info("No resources currently locked, proceeding"); + logger.debug("No resources currently locked, proceeding"); break; } // Check resources availability long freeDeviceMemoryInBytes = gpuInfoProvider.getCurrentInfo(res).freeDeviceMemoryInBytes(); enoughMemory = requiredMemoryInBytes <= freeDeviceMemoryInBytes; - logger.info("Free device memory [{} B], enoughMemory[{}]", freeDeviceMemoryInBytes); + logger.debug("Free device memory [{} B], enoughMemory[{}]", freeDeviceMemoryInBytes, enoughMemory); } else { - logger.info("No resources available in pool"); + logger.debug("No resources available in pool"); enoughMemory = false; } // TODO: add enoughComputation / enoughComputationCondition here @@ -201,14 +204,14 @@ protected CuVSResources createNew() { @Override public void finishedComputation(ManagedCuVSResources resources) { - logger.info("Computation finished"); + logger.debug("Computation finished"); // currently does nothing, but could allow acquire to return possibly blocked resources // enoughResourcesCondition.signalAll() } @Override public void release(ManagedCuVSResources resources) { - logger.info("Releasing resources to pool"); + logger.debug("Releasing resources to pool"); try { lock.lock(); assert resources.locked; diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswSQVectorsFormat.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswSQVectorsFormat.java index 400a855db6d6b..c4ab7c75a7a76 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswSQVectorsFormat.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswSQVectorsFormat.java @@ -17,6 +17,7 @@ import org.elasticsearch.index.codec.vectors.ES814ScalarQuantizedVectorsFormat; import java.io.IOException; +import java.util.function.Supplier; import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT; import static org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat.DEFAULT_BEAM_WIDTH; @@ -35,7 +36,7 @@ public class ESGpuHnswSQVectorsFormat extends KnnVectorsFormat { /** The format for storing, reading, merging vectors on disk */ private final FlatVectorsFormat flatVectorsFormat; - final CuVSResourceManager cuVSResourceManager; + private final Supplier cuVSResourceManagerSupplier; public ESGpuHnswSQVectorsFormat() { this(DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH, null, 7, false); @@ -43,7 +44,7 @@ public ESGpuHnswSQVectorsFormat() { public ESGpuHnswSQVectorsFormat(int maxConn, int beamWidth, Float confidenceInterval, int bits, boolean compress) { super(NAME); - this.cuVSResourceManager = CuVSResourceManager.pooling(); + this.cuVSResourceManagerSupplier = CuVSResourceManager::pooling; if (maxConn <= 0 || maxConn > MAXIMUM_MAX_CONN) { throw new IllegalArgumentException( "maxConn must be positive and less than or equal to " + MAXIMUM_MAX_CONN + "; maxConn=" + maxConn @@ -61,7 +62,13 @@ public ESGpuHnswSQVectorsFormat(int maxConn, int beamWidth, Float confidenceInte @Override public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException { - return new ESGpuHnswVectorsWriter(cuVSResourceManager, state, maxConn, beamWidth, flatVectorsFormat.fieldsWriter(state)); + return new ESGpuHnswVectorsWriter( + cuVSResourceManagerSupplier.get(), + state, + maxConn, + beamWidth, + flatVectorsFormat.fieldsWriter(state) + ); } @Override diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsFormat.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsFormat.java index b06b452435c83..610904380164c 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsFormat.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsFormat.java @@ -18,6 +18,7 @@ import org.apache.lucene.index.SegmentWriteState; import java.io.IOException; +import java.util.function.Supplier; import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT; @@ -47,26 +48,32 @@ public class ESGpuHnswVectorsFormat extends KnnVectorsFormat { private final int maxConn; // Intermediate graph degree, the number of connections for each node before pruning private final int beamWidth; - final CuVSResourceManager cuVSResourceManager; + private final Supplier cuVSResourceManagerSupplier; public ESGpuHnswVectorsFormat() { - this(CuVSResourceManager.pooling(), DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH); + this(CuVSResourceManager::pooling, DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH); } public ESGpuHnswVectorsFormat(int maxConn, int beamWidth) { - this(CuVSResourceManager.pooling(), maxConn, beamWidth); + this(CuVSResourceManager::pooling, maxConn, beamWidth); }; - public ESGpuHnswVectorsFormat(CuVSResourceManager cuVSResourceManager, int maxConn, int beamWidth) { + public ESGpuHnswVectorsFormat(Supplier cuVSResourceManagerSupplier, int maxConn, int beamWidth) { super(NAME); - this.cuVSResourceManager = cuVSResourceManager; + this.cuVSResourceManagerSupplier = cuVSResourceManagerSupplier; this.maxConn = maxConn; this.beamWidth = beamWidth; } @Override public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException { - return new ESGpuHnswVectorsWriter(cuVSResourceManager, state, maxConn, beamWidth, flatVectorsFormat.fieldsWriter(state)); + return new ESGpuHnswVectorsWriter( + cuVSResourceManagerSupplier.get(), + state, + maxConn, + beamWidth, + flatVectorsFormat.fieldsWriter(state) + ); } @Override diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java index ec27dd99bd673..49c4d9654f07a 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java @@ -24,13 +24,11 @@ import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.KnnVectorValues; import org.apache.lucene.index.MergeState; -import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.Sorter; import org.apache.lucene.index.VectorEncoding; import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.lucene.store.Directory; -import org.apache.lucene.store.FilterDirectory; import org.apache.lucene.store.FilterIndexInput; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswSQVectorsFormatTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswSQVectorsFormatTests.java index 8d639ccf58d32..d714c2b18618a 100644 --- a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswSQVectorsFormatTests.java +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswSQVectorsFormatTests.java @@ -24,13 +24,14 @@ public class ESGpuHnswSQVectorsFormatTests extends BaseKnnVectorsFormatTestCase LogConfigurator.configureESLogging(); // native access requires logging to be initialized } + static Codec codec; + @BeforeClass public static void beforeClass() { assumeTrue("cuvs not supported", GPUSupport.isSupported(false)); + codec = TestUtil.alwaysKnnVectorsFormat(new ESGpuHnswSQVectorsFormat()); } - static final Codec codec = TestUtil.alwaysKnnVectorsFormat(new ESGpuHnswSQVectorsFormat()); - @Override protected Codec getCodec() { return codec; diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsFormatTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsFormatTests.java index 77e758cda78c5..471c811dc3f2e 100644 --- a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsFormatTests.java +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsFormatTests.java @@ -25,13 +25,14 @@ public class ESGpuHnswVectorsFormatTests extends BaseKnnVectorsFormatTestCase { LogConfigurator.configureESLogging(); // native access requires logging to be initialized } + static Codec codec; + @BeforeClass public static void beforeClass() { assumeTrue("cuvs not supported", GPUSupport.isSupported(false)); + codec = TestUtil.alwaysKnnVectorsFormat(new ESGpuHnswVectorsFormat()); } - static final Codec codec = TestUtil.alwaysKnnVectorsFormat(new ESGpuHnswVectorsFormat()); - @Override protected Codec getCodec() { return codec; diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java index bba622313e7ee..fbb20cbde1baa 100644 --- a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java @@ -18,7 +18,7 @@ import org.elasticsearch.plugins.Plugin; import org.elasticsearch.xpack.gpu.GPUPlugin; import org.elasticsearch.xpack.gpu.GPUSupport; -import org.junit.Before; +import org.junit.BeforeClass; import java.io.IOException; import java.util.Collection; @@ -28,8 +28,8 @@ public class GPUDenseVectorFieldMapperTests extends DenseVectorFieldMapperTests { - @Before - public void setup() { + @BeforeClass + public static void setup() { assumeTrue("cuvs not supported", GPUSupport.isSupported(false)); } diff --git a/x-pack/plugin/gpu/src/yamlRestTest/java/org/elasticsearch/xpack/gpu/GPUClientYamlTestSuiteIT.java b/x-pack/plugin/gpu/src/yamlRestTest/java/org/elasticsearch/xpack/gpu/GPUClientYamlTestSuiteIT.java index 4bfaab9243d90..c4e7e936b0111 100644 --- a/x-pack/plugin/gpu/src/yamlRestTest/java/org/elasticsearch/xpack/gpu/GPUClientYamlTestSuiteIT.java +++ b/x-pack/plugin/gpu/src/yamlRestTest/java/org/elasticsearch/xpack/gpu/GPUClientYamlTestSuiteIT.java @@ -11,18 +11,32 @@ import org.elasticsearch.test.cluster.ElasticsearchCluster; import org.elasticsearch.test.rest.yaml.ClientYamlTestCandidate; import org.elasticsearch.test.rest.yaml.ESClientYamlSuiteTestCase; +import org.junit.BeforeClass; import org.junit.ClassRule; public class GPUClientYamlTestSuiteIT extends ESClientYamlSuiteTestCase { + @BeforeClass + public static void setup() { + assumeTrue("cuvs not supported", GPUSupport.isSupported(false)); + } + @ClassRule - public static ElasticsearchCluster cluster = ElasticsearchCluster.local() - .nodes(1) - .module("gpu") - .setting("xpack.license.self_generated.type", "trial") - .setting("xpack.security.enabled", "false") - .environment("LD_LIBRARY_PATH", System.getenv("LD_LIBRARY_PATH")) - .build(); + public static ElasticsearchCluster cluster = createCluster(); + + private static ElasticsearchCluster createCluster() { + var builder = ElasticsearchCluster.local() + .nodes(1) + .module("gpu") + .setting("xpack.license.self_generated.type", "trial") + .setting("xpack.security.enabled", "false"); + + var libraryPath = System.getenv("LD_LIBRARY_PATH"); + if (libraryPath != null) { + builder.environment("LD_LIBRARY_PATH", libraryPath); + } + return builder.build(); + } public GPUClientYamlTestSuiteIT(final ClientYamlTestCandidate testCandidate) { super(testCandidate); From 035ab73119ee71c769c62926c24b4dfbddc415b6 Mon Sep 17 00:00:00 2001 From: "elastic-vault-github-plugin-prod[bot]" <150874479+elastic-vault-github-plugin-prod[bot]@users.noreply.github.com> Date: Tue, 23 Sep 2025 04:31:49 +0000 Subject: [PATCH 085/109] Update cuvs snapshot version to 25.10.0 --- .buildkite/scripts/cuvs-snapshot/current-snapshot-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version index 75de90e35e663..c69a172a42172 100644 --- a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version +++ b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version @@ -1 +1 @@ -d02e0fc1 +8fc4a2ad From b427ff1767c5c94f4dce745d97333fd668dd0634 Mon Sep 17 00:00:00 2001 From: "elastic-vault-github-plugin-prod[bot]" <150874479+elastic-vault-github-plugin-prod[bot]@users.noreply.github.com> Date: Wed, 24 Sep 2025 04:47:48 +0000 Subject: [PATCH 086/109] Update cuvs snapshot version to 25.10.0 --- .buildkite/scripts/cuvs-snapshot/current-snapshot-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version index c69a172a42172..193a5e5289c72 100644 --- a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version +++ b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version @@ -1 +1 @@ -8fc4a2ad +b431fda8 From 9eb25b3b29077f453dbf402f92ef63fdef5055e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lorenzo=20Dematt=C3=A9?= Date: Wed, 24 Sep 2025 15:40:13 +0200 Subject: [PATCH 087/109] Use direct heap to GPU copy for raw vector data (#135210) Raw vector data needs to be copied to the GPU for building the CAGRA index. In #134563 we experimented with copying data directly from memory mapped files, and the current implementations uses a heap -> (compacted, native) memory segment -> GPU memory chain of copy operations. While copying from a contiguous chunk of memory to the GPU is faster (up to 3x), copying from sparse heap vectors to another (compacted, native) memory segment doubles the cost and the memory needed. Until we come up with a reliable way of using memory segments from Lucene directly (either pages of native memory or a "freshly flushed" memory mapped file), direct copy to GPU memory looks like the best compromise. This PR changes the "flush" and "merge" operations of our codec to use that. (** notice that performance should further improve when we'll have rapidsai/cuvs#1332) --- server/src/main/java/module-info.java | 1 - .../reflect/VectorsFormatReflectionUtils.java | 188 ------------ .../elasticsearch/plugin/gpu/GPUIndexIT.java | 8 +- .../gpu/codec/ESGpuHnswVectorsWriter.java | 285 +++++++----------- 4 files changed, 111 insertions(+), 371 deletions(-) delete mode 100644 server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/VectorsFormatReflectionUtils.java diff --git a/server/src/main/java/module-info.java b/server/src/main/java/module-info.java index 06ac7ab0a4bc0..f3881645f620f 100644 --- a/server/src/main/java/module-info.java +++ b/server/src/main/java/module-info.java @@ -490,5 +490,4 @@ exports org.elasticsearch.inference.telemetry; exports org.elasticsearch.index.codec.vectors.diskbbq to org.elasticsearch.test.knn; exports org.elasticsearch.index.codec.vectors.cluster to org.elasticsearch.test.knn; - exports org.elasticsearch.index.codec.vectors.reflect to org.elasticsearch.gpu; } diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/VectorsFormatReflectionUtils.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/VectorsFormatReflectionUtils.java deleted file mode 100644 index 3ffd0a92b2ebc..0000000000000 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/VectorsFormatReflectionUtils.java +++ /dev/null @@ -1,188 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -package org.elasticsearch.index.codec.vectors.reflect; - -import org.apache.lucene.codecs.hnsw.FlatFieldVectorsWriter; -import org.apache.lucene.codecs.hnsw.FlatVectorsWriter; -import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsWriter; -import org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsWriter; -import org.apache.lucene.index.Sorter; -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.util.quantization.ScalarQuantizer; -import org.elasticsearch.index.codec.vectors.ES814ScalarQuantizedVectorsFormat; - -import java.lang.invoke.MethodHandle; -import java.lang.invoke.MethodHandles; -import java.lang.invoke.MethodType; -import java.lang.invoke.VarHandle; - -public class VectorsFormatReflectionUtils { - - private static final VarHandle FLAT_VECTOR_DATA_HANDLE; - private static final VarHandle QUANTIZED_VECTOR_DATA_HANDLE; - private static final VarHandle RAW_DELEGATE_WRITER_HANDLE; - private static final VarHandle RAW_FIELD_DELEGATE_WRITER_HANDLE; - - private static final MethodHandle lucene99FlatVectorsWriter_writeField$mh; - private static final MethodHandle lucene99FlatVectorsWriter_writeSortingField$mh; - - private static final MethodHandle lucene99ScalarQuantizedVectorsWriter_writeField$mh; - private static final MethodHandle lucene99ScalarQuantizedVectorsWriter_writeSortingField$mh; - private static final MethodHandle lucene99ScalarQuantizedVectorsWriter_FieldWriter_createQuantizer$mh; - - static final Class L99_SQ_VW_CLS = Lucene99ScalarQuantizedVectorsWriter.class; - static final Class L99_SQ_VW_FIELD_WRITER_CLS; - static final Class L99_F_VW_CLS = Lucene99FlatVectorsWriter.class; - static final Class L99_F_VW_FIELD_WRITER_CLS; - - static { - try { - L99_F_VW_FIELD_WRITER_CLS = Class.forName("org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsWriter$FieldWriter"); - L99_SQ_VW_FIELD_WRITER_CLS = Class.forName( - "org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsWriter$FieldWriter" - ); - var lookup = MethodHandles.privateLookupIn(L99_F_VW_CLS, MethodHandles.lookup()); - FLAT_VECTOR_DATA_HANDLE = lookup.findVarHandle(L99_F_VW_CLS, "vectorData", IndexOutput.class); - lucene99FlatVectorsWriter_writeField$mh = lookup.findVirtual( - L99_F_VW_CLS, - "writeField", - MethodType.methodType(void.class, L99_F_VW_FIELD_WRITER_CLS, int.class) - ); - - lucene99FlatVectorsWriter_writeSortingField$mh = lookup.findVirtual( - L99_F_VW_CLS, - "writeSortingField", - MethodType.methodType(void.class, L99_F_VW_FIELD_WRITER_CLS, int.class, Sorter.DocMap.class) - ); - - lookup = MethodHandles.privateLookupIn(L99_SQ_VW_CLS, MethodHandles.lookup()); - QUANTIZED_VECTOR_DATA_HANDLE = lookup.findVarHandle(L99_SQ_VW_CLS, "quantizedVectorData", IndexOutput.class); - RAW_DELEGATE_WRITER_HANDLE = lookup.findVarHandle(L99_SQ_VW_CLS, "rawVectorDelegate", FlatVectorsWriter.class); - lucene99ScalarQuantizedVectorsWriter_writeField$mh = lookup.findVirtual( - L99_SQ_VW_CLS, - "writeField", - MethodType.methodType(void.class, L99_SQ_VW_FIELD_WRITER_CLS, int.class, ScalarQuantizer.class) - ); - lucene99ScalarQuantizedVectorsWriter_writeSortingField$mh = lookup.findVirtual( - L99_SQ_VW_CLS, - "writeSortingField", - MethodType.methodType(void.class, L99_SQ_VW_FIELD_WRITER_CLS, int.class, Sorter.DocMap.class, ScalarQuantizer.class) - ); - RAW_FIELD_DELEGATE_WRITER_HANDLE = lookup.findVarHandle( - L99_SQ_VW_FIELD_WRITER_CLS, - "flatFieldVectorsWriter", - FlatFieldVectorsWriter.class - ); - lucene99ScalarQuantizedVectorsWriter_FieldWriter_createQuantizer$mh = lookup.findVirtual( - L99_SQ_VW_FIELD_WRITER_CLS, - "createQuantizer", - MethodType.methodType(ScalarQuantizer.class) - ); - } catch (IllegalAccessException e) { - throw new AssertionError("should not happen, check opens", e); - } catch (ReflectiveOperationException e) { - throw new AssertionError(e); - } - } - - public static IndexOutput getQuantizedVectorDataIndexOutput(FlatVectorsWriter flatVectorWriter) { - assert flatVectorWriter instanceof ES814ScalarQuantizedVectorsFormat.ES814ScalarQuantizedVectorsWriter; - var rawVectorDelegate = getRawVectorDelegate( - (ES814ScalarQuantizedVectorsFormat.ES814ScalarQuantizedVectorsWriter) flatVectorWriter - ); - return (IndexOutput) FLAT_VECTOR_DATA_HANDLE.get(rawVectorDelegate); - } - - public static IndexOutput getVectorDataIndexOutput(FlatVectorsWriter flatVectorWriter) { - assert flatVectorWriter instanceof Lucene99FlatVectorsWriter; - return (IndexOutput) FLAT_VECTOR_DATA_HANDLE.get(flatVectorWriter); - } - - // private void Lucene99FlatVectorsWriter#writeField(FieldWriter fieldData, int maxDoc) - public static void lucene99FlatVectorsWriter_writeField( - Lucene99FlatVectorsWriter that, - FlatFieldVectorsWriter fieldData, - int maxDoc - ) { - try { - lucene99FlatVectorsWriter_writeField$mh.invokeExact(that, fieldData, maxDoc); - } catch (Throwable e) { - throw new RuntimeException(e); - } - } - - // private void Lucene99FlatVectorsWriter#writeSortingField(FieldWriter fieldData, int maxDoc) - public static void lucene99FlatVectorsWriter_writeSortingField( - Lucene99FlatVectorsWriter that, - FlatFieldVectorsWriter fieldData, - int maxDoc, - Sorter.DocMap sortMap - ) { - try { - lucene99FlatVectorsWriter_writeSortingField$mh.invokeExact(that, fieldData, maxDoc, sortMap); - } catch (Throwable e) { - throw new RuntimeException(e); - } - } - - // private void Lucene99FlatVectorsWriter#writeField(FieldWriter fieldData, int maxDoc) - public static void lucene99ScalarQuantizedVectorsWriter_writeField( - Lucene99ScalarQuantizedVectorsWriter that, - FlatFieldVectorsWriter fieldData, - int maxDoc, - ScalarQuantizer scalarQuantizer - ) { - try { - lucene99ScalarQuantizedVectorsWriter_writeField$mh.invokeExact(that, fieldData, maxDoc, scalarQuantizer); - } catch (Throwable e) { - throw new RuntimeException(e); - } - } - - // private void Lucene99FlatVectorsWriter#writeSortingField(FieldWriter fieldData, int maxDoc) - public static void lucene99ScalarQuantizedVectorsWriter_writeSortingField( - Lucene99ScalarQuantizedVectorsWriter that, - FlatFieldVectorsWriter fieldData, - int maxDoc, - Sorter.DocMap sortMap, - ScalarQuantizer scalarQuantizer - ) { - try { - lucene99ScalarQuantizedVectorsWriter_writeSortingField$mh.invokeExact(that, fieldData, maxDoc, sortMap, scalarQuantizer); - } catch (Throwable e) { - throw new RuntimeException(e); - } - } - - public static ScalarQuantizer lucene99ScalarQuantizedVectorsWriter_FieldWriter_createQuantizer(FlatFieldVectorsWriter fieldData) { - try { - return (ScalarQuantizer) lucene99ScalarQuantizedVectorsWriter_FieldWriter_createQuantizer$mh.invokeExact(fieldData); - } catch (Throwable e) { - throw new RuntimeException(e); - } - } - - public static Lucene99FlatVectorsWriter getRawVectorDelegate( - ES814ScalarQuantizedVectorsFormat.ES814ScalarQuantizedVectorsWriter quantizedVectorsWriter - ) { - return (Lucene99FlatVectorsWriter) RAW_DELEGATE_WRITER_HANDLE.get(quantizedVectorsWriter.delegate); - } - - public static FlatFieldVectorsWriter getRawFieldVectorDelegate(FlatFieldVectorsWriter flatFieldVectorsWriter) { - if (L99_F_VW_FIELD_WRITER_CLS.isAssignableFrom(flatFieldVectorsWriter.getClass())) { - return flatFieldVectorsWriter; - } else { - assert L99_SQ_VW_FIELD_WRITER_CLS.isAssignableFrom(flatFieldVectorsWriter.getClass()); - @SuppressWarnings("unchecked") - var rawFieldVectorDelegate = (FlatFieldVectorsWriter) RAW_FIELD_DELEGATE_WRITER_HANDLE.get(flatFieldVectorsWriter); - return rawFieldVectorDelegate; - } - } -} diff --git a/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java index 5d2ad832c657b..37542305cbf41 100644 --- a/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java +++ b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java @@ -102,7 +102,7 @@ public void testSortedIndexReturnsSameResultsAsUnsorted() { Assert.assertEquals(hits1.length, hits2.length); for (int i = 0; i < hits1.length; i++) { Assert.assertEquals(hits1[i].getId(), hits2[i].getId()); - Assert.assertEquals((String) hits1[i].field("my_keyword").getValue(), (String) hits2[i].field("my_keyword").getValue()); + Assert.assertEquals(hits1[i].field("my_keyword").getValue(), (String) hits2[i].field("my_keyword").getValue()); Assert.assertEquals(hits1[i].getScore(), hits2[i].getScore(), 0.001f); } } finally { @@ -133,7 +133,7 @@ public void testSortedIndexReturnsSameResultsAsUnsorted() { Assert.assertEquals(hits3.length, hits4.length); for (int i = 0; i < hits3.length; i++) { Assert.assertEquals(hits3[i].getId(), hits4[i].getId()); - Assert.assertEquals((String) hits3[i].field("my_keyword").getValue(), (String) hits4[i].field("my_keyword").getValue()); + Assert.assertEquals(hits3[i].field("my_keyword").getValue(), (String) hits4[i].field("my_keyword").getValue()); Assert.assertEquals(hits3[i].getScore(), hits4[i].getScore(), 0.01f); } } finally { @@ -210,9 +210,7 @@ private void assertSearch(String indexName, float[] queryVector, int totalDocs) .setFetchSource(false) .addFetchField("my_keyword") .setKnnSearch(List.of(new KnnSearchBuilder("my_vector", queryVector, k, numCandidates, null, null, null))), - response -> { - assertEquals("Expected k hits to be returned", k, response.getHits().getHits().length); - } + response -> assertEquals("Expected k hits to be returned", k, response.getHits().getHits().length) ); } diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java index 49c4d9654f07a..6edea5a6d2d5d 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java @@ -42,7 +42,6 @@ import org.elasticsearch.core.IOUtils; import org.elasticsearch.core.SuppressForbidden; import org.elasticsearch.index.codec.vectors.ES814ScalarQuantizedVectorsFormat; -import org.elasticsearch.index.codec.vectors.reflect.VectorsFormatReflectionUtils; import org.elasticsearch.logging.LogManager; import org.elasticsearch.logging.Logger; @@ -51,21 +50,12 @@ import java.nio.ByteOrder; import java.util.ArrayList; import java.util.Arrays; -import java.util.HashMap; import java.util.List; -import java.util.Map; import java.util.Objects; import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.SIMILARITY_FUNCTIONS; import static org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsWriter.mergeAndRecalculateQuantiles; import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; -import static org.elasticsearch.index.codec.vectors.reflect.VectorsFormatReflectionUtils.getRawFieldVectorDelegate; -import static org.elasticsearch.index.codec.vectors.reflect.VectorsFormatReflectionUtils.getRawVectorDelegate; -import static org.elasticsearch.index.codec.vectors.reflect.VectorsFormatReflectionUtils.lucene99FlatVectorsWriter_writeField; -import static org.elasticsearch.index.codec.vectors.reflect.VectorsFormatReflectionUtils.lucene99FlatVectorsWriter_writeSortingField; -import static org.elasticsearch.index.codec.vectors.reflect.VectorsFormatReflectionUtils.lucene99ScalarQuantizedVectorsWriter_FieldWriter_createQuantizer; -import static org.elasticsearch.index.codec.vectors.reflect.VectorsFormatReflectionUtils.lucene99ScalarQuantizedVectorsWriter_writeField; -import static org.elasticsearch.index.codec.vectors.reflect.VectorsFormatReflectionUtils.lucene99ScalarQuantizedVectorsWriter_writeSortingField; import static org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat.LUCENE99_HNSW_META_CODEC_NAME; import static org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat.LUCENE99_HNSW_META_EXTENSION; import static org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat.LUCENE99_HNSW_VECTOR_INDEX_CODEC_NAME; @@ -74,7 +64,7 @@ import static org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat.MIN_NUM_VECTORS_FOR_GPU_BUILD; /** - * Writer that builds a Nvidia Carga Graph on GPU and than writes it into the Lucene99 HNSW format, + * Writer that builds an Nvidia Carga Graph on GPU and then writes it into the Lucene99 HNSW format, * so that it can be searched on CPU with Lucene99HNSWVectorReader. */ final class ESGpuHnswVectorsWriter extends KnnVectorsWriter { @@ -85,7 +75,6 @@ final class ESGpuHnswVectorsWriter extends KnnVectorsWriter { private final CuVSResourceManager cuVSResourceManager; private final SegmentWriteState segmentWriteState; private final IndexOutput meta, vectorIndex; - private final IndexOutput vectorData; private final int M; private final int beamWidth; private final FlatVectorsWriter flatVectorWriter; @@ -107,11 +96,9 @@ final class ESGpuHnswVectorsWriter extends KnnVectorsWriter { this.beamWidth = beamWidth; this.flatVectorWriter = flatVectorWriter; if (flatVectorWriter instanceof ES814ScalarQuantizedVectorsFormat.ES814ScalarQuantizedVectorsWriter) { - vectorData = VectorsFormatReflectionUtils.getQuantizedVectorDataIndexOutput(flatVectorWriter); dataType = CuVSMatrix.DataType.BYTE; } else { assert flatVectorWriter instanceof Lucene99FlatVectorsWriter; - vectorData = VectorsFormatReflectionUtils.getVectorDataIndexOutput(flatVectorWriter); dataType = CuVSMatrix.DataType.FLOAT; } this.segmentWriteState = state; @@ -161,8 +148,6 @@ public KnnFieldVectorsWriter addField(FieldInfo fieldInfo) throws IOException return newField; } - private record FieldEntry(long vectorDataOffset, long vectorDataLength) {} - /** * Flushes vector data and associated data to disk. *

@@ -176,135 +161,72 @@ private record FieldEntry(long vectorDataOffset, long vectorDataLength) {} */ @Override public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException { - // var mappedFields = new HashMap(); - // flatVectorWriterFlush(maxDoc, sortMap, mappedFields); flatVectorWriter.flush(maxDoc, sortMap); - flushFieldsWithoutMemoryMappedFile(sortMap); - } - - private void flatVectorWriterFlush(int maxDoc, Sorter.DocMap sortMap, HashMap mappedFields) throws IOException { - if (flatVectorWriter instanceof Lucene99FlatVectorsWriter lucene99FlatVectorsWriter) { - flushLucene99FlatVectorsWriter(lucene99FlatVectorsWriter, maxDoc, sortMap, mappedFields); - } else { - assert flatVectorWriter instanceof ES814ScalarQuantizedVectorsFormat.ES814ScalarQuantizedVectorsWriter; - var quantizedVectorsWriter = (ES814ScalarQuantizedVectorsFormat.ES814ScalarQuantizedVectorsWriter) flatVectorWriter; - Lucene99FlatVectorsWriter rawVectorDelegate = getRawVectorDelegate(quantizedVectorsWriter); - flushLucene99FlatVectorsWriter(rawVectorDelegate, maxDoc, sortMap, mappedFields); - flushLucene99ScalarQuantizedVectorsWriter(quantizedVectorsWriter, maxDoc, sortMap); - } - } - - private void flushLucene99ScalarQuantizedVectorsWriter( - ES814ScalarQuantizedVectorsFormat.ES814ScalarQuantizedVectorsWriter quantizedVectorsWriter, - int maxDoc, - Sorter.DocMap sortMap - ) throws IOException { - for (var field : fields) { - ScalarQuantizer quantizer = lucene99ScalarQuantizedVectorsWriter_FieldWriter_createQuantizer(field.flatFieldVectorsWriter); - if (sortMap == null) { - lucene99ScalarQuantizedVectorsWriter_writeField( - quantizedVectorsWriter.delegate, - field.flatFieldVectorsWriter, - maxDoc, - quantizer - ); - } else { - lucene99ScalarQuantizedVectorsWriter_writeSortingField( - quantizedVectorsWriter.delegate, - field.flatFieldVectorsWriter, - maxDoc, - sortMap, - quantizer - ); - } - field.flatFieldVectorsWriter.finish(); - } - } - - private void flushLucene99FlatVectorsWriter( - Lucene99FlatVectorsWriter lucene99FlatVectorsWriter, - int maxDoc, - Sorter.DocMap sortMap, - Map mappedFields - ) throws IOException { - for (var field : fields) { - FlatFieldVectorsWriter flatFieldVectorsWriter = getRawFieldVectorDelegate(field.flatFieldVectorsWriter); - - long vectorDataOffset = vectorData.alignFilePointer(Float.BYTES); - long vectorDataLength = (long) field.fieldInfo.getVectorDimension() * Float.BYTES * flatFieldVectorsWriter.getVectors().size(); - mappedFields.put(field.fieldInfo.number, new FieldEntry(vectorDataOffset, vectorDataLength)); - - if (sortMap == null) { - lucene99FlatVectorsWriter_writeField(lucene99FlatVectorsWriter, flatFieldVectorsWriter, maxDoc); - } else { - lucene99FlatVectorsWriter_writeSortingField(lucene99FlatVectorsWriter, flatFieldVectorsWriter, maxDoc, sortMap); - } - flatFieldVectorsWriter.finish(); + try { + flushFieldsWithoutMemoryMappedFile(sortMap); + } catch (Throwable t) { + throw new IOException("Failed to flush GPU index: ", t); } } - private void flushFieldsWithMemoryMappedFile( - Sorter.DocMap sortMap, - MemorySegmentAccessInput memorySegmentAccessInput, - HashMap mappedFields - ) throws IOException { + private void flushFieldsWithoutMemoryMappedFile(Sorter.DocMap sortMap) throws IOException, InterruptedException { + // No tmp file written, or the file cannot be mmapped for (FieldWriter field : fields) { var fieldInfo = field.fieldInfo; var numVectors = field.flatFieldVectorsWriter.getVectors().size(); if (numVectors < MIN_NUM_VECTORS_FOR_GPU_BUILD) { + if (logger.isDebugEnabled()) { + logger.debug( + "Skip building carga index; vectors length {} < {} (min for GPU)", + numVectors, + MIN_NUM_VECTORS_FOR_GPU_BUILD + ); + } // Will not be indexed on the GPU - assert mappedFields.containsKey(fieldInfo.number) == false; - flushField(fieldInfo, null, numVectors, sortMap); + flushFieldWithMockGraph(fieldInfo, numVectors, sortMap); } else { - var fieldEntry = mappedFields.get(fieldInfo.number); - assert fieldEntry != null; - - flushField( - fieldInfo, - DatasetUtils.getInstance() - .fromSlice( - memorySegmentAccessInput, - fieldEntry.vectorDataOffset, - fieldEntry.vectorDataLength, - numVectors, - fieldInfo.getVectorDimension(), - CuVSMatrix.DataType.FLOAT - ), - numVectors, - sortMap - ); + var cuVSResources = cuVSResourceManager.acquire(numVectors, fieldInfo.getVectorDimension(), CuVSMatrix.DataType.FLOAT); + try { + var builder = CuVSMatrix.deviceBuilder( + cuVSResources, + numVectors, + fieldInfo.getVectorDimension(), + CuVSMatrix.DataType.FLOAT + ); + for (var vector : field.flatFieldVectorsWriter.getVectors()) { + builder.addVector(vector); + } + try (var dataset = builder.build()) { + flushFieldWithGpuGraph(cuVSResources, fieldInfo, dataset, sortMap); + } + } finally { + cuVSResourceManager.release(cuVSResources); + } } } } - private void flushFieldsWithoutMemoryMappedFile(Sorter.DocMap sortMap) throws IOException { - // No tmp file written, or the file cannot be mmapped - for (FieldWriter field : fields) { - var fieldInfo = field.fieldInfo; - - var numVectors = field.flatFieldVectorsWriter.getVectors().size(); - if (numVectors < MIN_NUM_VECTORS_FOR_GPU_BUILD) { - // Will not be indexed on the GPU - flushField(fieldInfo, null, numVectors, sortMap); - } else { - var builder = CuVSMatrix.hostBuilder(numVectors, fieldInfo.getVectorDimension(), CuVSMatrix.DataType.FLOAT); - for (var vector : field.flatFieldVectorsWriter.getVectors()) { - builder.addVector(vector); - } - try (var dataset = builder.build()) { - flushField(fieldInfo, dataset, numVectors, sortMap); - } - } + private void flushFieldWithMockGraph(FieldInfo fieldInfo, int numVectors, Sorter.DocMap sortMap) throws IOException { + if (sortMap == null) { + generateMockGraphAndWriteMeta(fieldInfo, numVectors); + } else { + // TODO: use sortMap + generateMockGraphAndWriteMeta(fieldInfo, numVectors); } } - private void flushField(FieldInfo fieldInfo, CuVSMatrix dataset, int numVectors, Sorter.DocMap sortMap) throws IOException { + private void flushFieldWithGpuGraph( + CuVSResourceManager.ManagedCuVSResources resources, + FieldInfo fieldInfo, + CuVSMatrix dataset, + Sorter.DocMap sortMap + ) throws IOException { if (sortMap == null) { - writeFieldInternal(fieldInfo, dataset, numVectors); + generateGpuGraphAndWriteMeta(resources, fieldInfo, dataset); } else { // TODO: use sortMap - writeFieldInternal(fieldInfo, dataset, numVectors); + generateGpuGraphAndWriteMeta(resources, fieldInfo, dataset); } } @@ -336,32 +258,36 @@ public long ramBytesUsed() { return total; } - private void writeFieldInternal(FieldInfo fieldInfo, CuVSMatrix dataset, int datasetSize) throws IOException { + private void generateGpuGraphAndWriteMeta( + CuVSResourceManager.ManagedCuVSResources cuVSResources, + FieldInfo fieldInfo, + CuVSMatrix dataset + ) throws IOException { try { + assert dataset.size() >= MIN_NUM_VECTORS_FOR_GPU_BUILD; + long vectorIndexOffset = vectorIndex.getFilePointer(); int[][] graphLevelNodeOffsets = new int[1][]; final HnswGraph graph; - if (dataset == null) { - if (logger.isDebugEnabled()) { - logger.debug( - "Skip building carga index; vectors length {} < {} (min for GPU)", - datasetSize, - MIN_NUM_VECTORS_FOR_GPU_BUILD - ); - } - graph = writeMockGraph(datasetSize, graphLevelNodeOffsets); - } else { - var cuVSResources = cuVSResourceManager.acquire((int) dataset.size(), (int) dataset.columns(), dataset.dataType()); - try { - try (var index = buildGPUIndex(cuVSResources, fieldInfo.getVectorSimilarityFunction(), dataset)) { - assert index != null : "GPU index should be built for field: " + fieldInfo.name; - graph = writeGraph(index.getGraph(), graphLevelNodeOffsets); - } - } finally { - cuVSResourceManager.release(cuVSResources); - } + try (var index = buildGPUIndex(cuVSResources, fieldInfo.getVectorSimilarityFunction(), dataset)) { + assert index != null : "GPU index should be built for field: " + fieldInfo.name; + graph = writeGraph(index.getGraph(), graphLevelNodeOffsets); } long vectorIndexLength = vectorIndex.getFilePointer() - vectorIndexOffset; + writeMeta(fieldInfo, vectorIndexOffset, vectorIndexLength, (int) dataset.size(), graph, graphLevelNodeOffsets); + } catch (IOException e) { + throw e; + } catch (Throwable t) { + throw new IOException("Failed to write GPU index: ", t); + } + } + + private void generateMockGraphAndWriteMeta(FieldInfo fieldInfo, int datasetSize) throws IOException { + try { + long vectorIndexOffset = vectorIndex.getFilePointer(); + int[][] graphLevelNodeOffsets = new int[1][]; + final HnswGraph graph = writeMockGraph(datasetSize, graphLevelNodeOffsets); + long vectorIndexLength = vectorIndex.getFilePointer() - vectorIndexOffset; writeMeta(fieldInfo, vectorIndexOffset, vectorIndexLength, datasetSize, graph, graphLevelNodeOffsets); } catch (IOException e) { throw e; @@ -542,50 +468,60 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE try (IndexInput in = mergeState.segmentInfo.dir.openInput(tempRawVectorsFileName, IOContext.DEFAULT)) { var input = FilterIndexInput.unwrapOnlyTest(in); - final CuVSMatrix dataset; if (numVectors >= MIN_NUM_VECTORS_FOR_GPU_BUILD) { if (input instanceof MemorySegmentAccessInput memorySegmentAccessInput) { // Direct access to mmapped file - dataset = DatasetUtils.getInstance() + final var dataset = DatasetUtils.getInstance() .fromInput(memorySegmentAccessInput, numVectors, fieldInfo.getVectorDimension(), dataType); + + var cuVSResources = cuVSResourceManager.acquire(numVectors, fieldInfo.getVectorDimension(), dataType); + try { + generateGpuGraphAndWriteMeta(cuVSResources, fieldInfo, dataset); + } finally { + dataset.close(); + cuVSResourceManager.release(cuVSResources); + } } else { logger.debug( () -> "Cannot mmap merged raw vectors temporary file. IndexInput type [" + input.getClass().getSimpleName() + "]" ); - // Read vector-by-vector - var builder = CuVSMatrix.hostBuilder(numVectors, fieldInfo.getVectorDimension(), dataType); - - // During merging, we use quantized data, so we need to support byte[] too. - // That's how our current formats work: use floats during indexing, and quantized data to build a graph during merging. - if (dataType == CuVSMatrix.DataType.FLOAT) { - float[] vector = new float[fieldInfo.getVectorDimension()]; - for (int i = 0; i < numVectors; ++i) { - input.readFloats(vector, 0, fieldInfo.getVectorDimension()); - builder.addVector(vector); + var cuVSResources = cuVSResourceManager.acquire(numVectors, fieldInfo.getVectorDimension(), dataType); + try { + // Read vector-by-vector + var builder = CuVSMatrix.deviceBuilder(cuVSResources, numVectors, fieldInfo.getVectorDimension(), dataType); + + // During merging, we use quantized data, so we need to support byte[] too. + // That's how our current formats work: use floats during indexing, and quantized data to build a graph + // during merging. + if (dataType == CuVSMatrix.DataType.FLOAT) { + float[] vector = new float[fieldInfo.getVectorDimension()]; + for (int i = 0; i < numVectors; ++i) { + input.readFloats(vector, 0, fieldInfo.getVectorDimension()); + builder.addVector(vector); + } + } else { + assert dataType == CuVSMatrix.DataType.BYTE; + byte[] vector = new byte[fieldInfo.getVectorDimension()]; + for (int i = 0; i < numVectors; ++i) { + input.readBytes(vector, 0, fieldInfo.getVectorDimension()); + builder.addVector(vector); + } } - } else { - assert dataType == CuVSMatrix.DataType.BYTE; - byte[] vector = new byte[fieldInfo.getVectorDimension()]; - for (int i = 0; i < numVectors; ++i) { - input.readBytes(vector, 0, fieldInfo.getVectorDimension()); - builder.addVector(vector); + try (var dataset = builder.build()) { + generateGpuGraphAndWriteMeta(cuVSResources, fieldInfo, dataset); } + } finally { + cuVSResourceManager.release(cuVSResources); } - dataset = builder.build(); } } else { // we don't really need real value for vectors here, // we just build a mock graph where every node is connected to every other node - dataset = null; - } - try { - writeFieldInternal(fieldInfo, dataset, numVectors); - } finally { - if (dataset != null) { - dataset.close(); - } + generateMockGraphAndWriteMeta(fieldInfo, numVectors); } + } catch (Throwable t) { + throw new IOException("Failed to merge GPU index: ", t); } finally { deleteFilesIgnoringExceptions(mergeState.segmentInfo.dir, tempRawVectorsFileName); } @@ -596,12 +532,7 @@ private ByteVectorValues getMergedByteVectorValues(FieldInfo fieldInfo, MergeSta final byte bits = 7; final Float confidenceInterval = null; ScalarQuantizer quantizer = mergeAndRecalculateQuantiles(mergeState, fieldInfo, confidenceInterval, bits); - MergedQuantizedVectorValues byteVectorValues = MergedQuantizedVectorValues.mergeQuantizedByteVectorValues( - fieldInfo, - mergeState, - quantizer - ); - return byteVectorValues; + return MergedQuantizedVectorValues.mergeQuantizedByteVectorValues(fieldInfo, mergeState, quantizer); } private static int writeByteVectorValues(IndexOutput out, ByteVectorValues vectorValues) throws IOException { From 67b0fbcdbec83c44459d986dc8d53734844a3294 Mon Sep 17 00:00:00 2001 From: "elastic-vault-github-plugin-prod[bot]" <150874479+elastic-vault-github-plugin-prod[bot]@users.noreply.github.com> Date: Thu, 25 Sep 2025 04:32:04 +0000 Subject: [PATCH 088/109] Update cuvs snapshot version to 25.10.0 --- .buildkite/scripts/cuvs-snapshot/current-snapshot-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version index 193a5e5289c72..2ec0bb2cd66b2 100644 --- a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version +++ b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version @@ -1 +1 @@ -b431fda8 +12ebfa1e From 6890e3ae6ca73b38e6169db9b4fcd98282e2ee51 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Thu, 25 Sep 2025 13:07:28 -0400 Subject: [PATCH 089/109] Remove 24 launcher --- qa/vector/build.gradle | 3 --- 1 file changed, 3 deletions(-) diff --git a/qa/vector/build.gradle b/qa/vector/build.gradle index ddef89566bbf4..263312fa6688e 100644 --- a/qa/vector/build.gradle +++ b/qa/vector/build.gradle @@ -48,9 +48,6 @@ tasks.register("checkVec", JavaExec) { systemProperty "es.logger.out", "console" systemProperty "es.logger.level", "INFO" // Change to DEBUG if needed systemProperty 'es.nativelibs.path', TestUtil.getTestLibraryPath(file("../../libs/native/libraries/build/platform/").toString()) - javaLauncher = project.javaToolchains.launcherFor { - languageVersion = JavaLanguageVersion.of(24) - } jvmArgs '-Xms4g', '-Xmx4g', '-Djava.util.concurrent.ForkJoinPool.common.parallelism=8', '-XX:+UnlockDiagnosticVMOptions', '-XX:+DebugNonSafepoints', '-XX:+HeapDumpOnOutOfMemoryError' if (buildParams.getRuntimeJavaVersion().map { it.majorVersion.toInteger() }.get() >= 21) { jvmArgs '--add-modules=jdk.incubator.vector', '--enable-native-access=ALL-UNNAMED' From 2b0007a7a339529afd86924f8a9ab2bd67ea9064 Mon Sep 17 00:00:00 2001 From: "elastic-vault-github-plugin-prod[bot]" <150874479+elastic-vault-github-plugin-prod[bot]@users.noreply.github.com> Date: Fri, 26 Sep 2025 04:37:27 +0000 Subject: [PATCH 090/109] Update cuvs snapshot version to 25.10.0 --- .buildkite/scripts/cuvs-snapshot/current-snapshot-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version index 2ec0bb2cd66b2..8eea338605f26 100644 --- a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version +++ b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version @@ -1 +1 @@ -12ebfa1e +e32b14db From 45708de4367c6b6dc996982dd6e7598aa17d69ac Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Fri, 26 Sep 2025 10:26:30 -0400 Subject: [PATCH 091/109] Rename GPU format to ES92 --- .../test/knn/KnnIndexTester.java | 8 +++---- .../elasticsearch/plugin/gpu/GPUIndexIT.java | 1 + .../plugin/gpu/src/main/java/module-info.java | 8 +++---- .../elasticsearch/xpack/gpu/GPUPlugin.java | 12 +++++----- .../xpack/gpu/codec/CuVSResourceManager.java | 2 +- ...t.java => ES92GpuHnswSQVectorsFormat.java} | 14 +++++------ ...mat.java => ES92GpuHnswVectorsFormat.java} | 12 +++++----- ...ter.java => ES92GpuHnswVectorsWriter.java} | 24 ++++++++++--------- .../org.apache.lucene.codecs.KnnVectorsFormat | 4 ++-- .../codec/ESGpuHnswSQVectorsFormatTests.java | 2 +- .../codec/ESGpuHnswVectorsFormatTests.java | 2 +- .../codec/GPUDenseVectorFieldMapperTests.java | 4 ++-- 12 files changed, 48 insertions(+), 45 deletions(-) rename x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/{ESGpuHnswSQVectorsFormat.java => ES92GpuHnswSQVectorsFormat.java} (85%) rename x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/{ESGpuHnswVectorsFormat.java => ES92GpuHnswVectorsFormat.java} (89%) rename x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/{ESGpuHnswVectorsWriter.java => ES92GpuHnswVectorsWriter.java} (96%) diff --git a/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java b/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java index 76ddd3d67c5b7..9e4dca46f0c18 100644 --- a/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java +++ b/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java @@ -39,8 +39,8 @@ import org.elasticsearch.xcontent.XContentParser; import org.elasticsearch.xcontent.XContentParserConfiguration; import org.elasticsearch.xcontent.XContentType; -import org.elasticsearch.xpack.gpu.codec.ESGpuHnswSQVectorsFormat; -import org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat; +import org.elasticsearch.xpack.gpu.codec.ES92GpuHnswSQVectorsFormat; +import org.elasticsearch.xpack.gpu.codec.ES92GpuHnswVectorsFormat; import java.io.IOException; import java.io.InputStream; @@ -114,9 +114,9 @@ static Codec createCodec(CmdLineArgs args) { format = new ES920DiskBBQVectorsFormat(args.ivfClusterSize(), ES920DiskBBQVectorsFormat.DEFAULT_CENTROIDS_PER_PARENT_CLUSTER); } else if (args.indexType() == IndexType.GPU_HNSW) { if (args.quantizeBits() == 32) { - format = new ESGpuHnswVectorsFormat(); + format = new ES92GpuHnswVectorsFormat(); } else if (args.quantizeBits() == 7) { - format = new ESGpuHnswSQVectorsFormat(); + format = new ES92GpuHnswSQVectorsFormat(); } else { throw new IllegalArgumentException( "GPU HNSW index type only supports 7 or 32 bits quantization, but got: " + args.quantizeBits() diff --git a/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java index 37542305cbf41..b00d8d83143a9 100644 --- a/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java +++ b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java @@ -55,6 +55,7 @@ public void testBasic() { assertSearch(indexName, randomFloatVector(dims), totalDocs); } + @AwaitsFix(bugUrl = "Fix sorted index") public void testSortedIndexReturnsSameResultsAsUnsorted() { String indexName1 = "index_unsorted"; String indexName2 = "index_sorted"; diff --git a/x-pack/plugin/gpu/src/main/java/module-info.java b/x-pack/plugin/gpu/src/main/java/module-info.java index a5cc05edc6c92..dcada289c1376 100644 --- a/x-pack/plugin/gpu/src/main/java/module-info.java +++ b/x-pack/plugin/gpu/src/main/java/module-info.java @@ -5,6 +5,9 @@ * 2.0. */ +import org.elasticsearch.xpack.gpu.codec.ES92GpuHnswSQVectorsFormat; +import org.elasticsearch.xpack.gpu.codec.ES92GpuHnswVectorsFormat; + /** Provides GPU-accelerated support for vector indexing. */ module org.elasticsearch.gpu { requires org.elasticsearch.logging; @@ -17,8 +20,5 @@ exports org.elasticsearch.xpack.gpu.codec; provides org.elasticsearch.features.FeatureSpecification with org.elasticsearch.xpack.gpu.GPUFeatures; - provides org.apache.lucene.codecs.KnnVectorsFormat - with - org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat, - org.elasticsearch.xpack.gpu.codec.ESGpuHnswSQVectorsFormat; + provides org.apache.lucene.codecs.KnnVectorsFormat with ES92GpuHnswVectorsFormat, ES92GpuHnswSQVectorsFormat; } diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java index cd469b2d25828..67428851c2cb8 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java @@ -14,8 +14,8 @@ import org.elasticsearch.index.mapper.vectors.VectorsFormatProvider; import org.elasticsearch.plugins.MapperPlugin; import org.elasticsearch.plugins.Plugin; -import org.elasticsearch.xpack.gpu.codec.ESGpuHnswSQVectorsFormat; -import org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat; +import org.elasticsearch.xpack.gpu.codec.ES92GpuHnswSQVectorsFormat; +import org.elasticsearch.xpack.gpu.codec.ES92GpuHnswVectorsFormat; public class GPUPlugin extends Plugin implements MapperPlugin { @@ -58,16 +58,16 @@ private static KnnVectorsFormat getVectorsFormat(DenseVectorFieldMapper.DenseVec DenseVectorFieldMapper.HnswIndexOptions hnswIndexOptions = (DenseVectorFieldMapper.HnswIndexOptions) indexOptions; int efConstruction = hnswIndexOptions.efConstruction(); if (efConstruction == HnswGraphBuilder.DEFAULT_BEAM_WIDTH) { - efConstruction = ESGpuHnswVectorsFormat.DEFAULT_BEAM_WIDTH; // default value for GPU graph construction is 128 + efConstruction = ES92GpuHnswVectorsFormat.DEFAULT_BEAM_WIDTH; // default value for GPU graph construction is 128 } - return new ESGpuHnswVectorsFormat(hnswIndexOptions.m(), efConstruction); + return new ES92GpuHnswVectorsFormat(hnswIndexOptions.m(), efConstruction); } else if (indexOptions.getType() == DenseVectorFieldMapper.VectorIndexType.INT8_HNSW) { DenseVectorFieldMapper.Int8HnswIndexOptions int8HnswIndexOptions = (DenseVectorFieldMapper.Int8HnswIndexOptions) indexOptions; int efConstruction = int8HnswIndexOptions.efConstruction(); if (efConstruction == HnswGraphBuilder.DEFAULT_BEAM_WIDTH) { - efConstruction = ESGpuHnswVectorsFormat.DEFAULT_BEAM_WIDTH; // default value for GPU graph construction is 128 + efConstruction = ES92GpuHnswVectorsFormat.DEFAULT_BEAM_WIDTH; // default value for GPU graph construction is 128 } - return new ESGpuHnswSQVectorsFormat( + return new ES92GpuHnswSQVectorsFormat( int8HnswIndexOptions.m(), efConstruction, int8HnswIndexOptions.confidenceInterval(), diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManager.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManager.java index a62585c562342..44240a848268b 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManager.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManager.java @@ -75,7 +75,7 @@ class PoolingCuVSResourceManager implements CuVSResourceManager { /** A multiplier on input data to account for intermediate and output data size required while processing it */ static final double GPU_COMPUTATION_MEMORY_FACTOR = 2.0; - static final int MAX_RESOURCES = 2; + static final int MAX_RESOURCES = 4; static class Holder { static final PoolingCuVSResourceManager INSTANCE = new PoolingCuVSResourceManager( diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswSQVectorsFormat.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswSQVectorsFormat.java similarity index 85% rename from x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswSQVectorsFormat.java rename to x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswSQVectorsFormat.java index c4ab7c75a7a76..60c781afa135b 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswSQVectorsFormat.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswSQVectorsFormat.java @@ -20,15 +20,15 @@ import java.util.function.Supplier; import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT; -import static org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat.DEFAULT_BEAM_WIDTH; -import static org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat.DEFAULT_MAX_CONN; +import static org.elasticsearch.xpack.gpu.codec.ES92GpuHnswVectorsFormat.DEFAULT_BEAM_WIDTH; +import static org.elasticsearch.xpack.gpu.codec.ES92GpuHnswVectorsFormat.DEFAULT_MAX_CONN; /** * Codec format for GPU-accelerated scalar quantized HNSW vector indexes. * HNSW graph is built on GPU, while scalar quantization and search is performed on CPU. */ -public class ESGpuHnswSQVectorsFormat extends KnnVectorsFormat { - public static final String NAME = "ESGPUHnswScalarQuantizedVectorsFormat"; +public class ES92GpuHnswSQVectorsFormat extends KnnVectorsFormat { + public static final String NAME = "ES92GPUHnswScalarQuantizedVectorsFormat"; static final int MAXIMUM_MAX_CONN = 512; static final int MAXIMUM_BEAM_WIDTH = 3200; private final int maxConn; @@ -38,11 +38,11 @@ public class ESGpuHnswSQVectorsFormat extends KnnVectorsFormat { private final FlatVectorsFormat flatVectorsFormat; private final Supplier cuVSResourceManagerSupplier; - public ESGpuHnswSQVectorsFormat() { + public ES92GpuHnswSQVectorsFormat() { this(DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH, null, 7, false); } - public ESGpuHnswSQVectorsFormat(int maxConn, int beamWidth, Float confidenceInterval, int bits, boolean compress) { + public ES92GpuHnswSQVectorsFormat(int maxConn, int beamWidth, Float confidenceInterval, int bits, boolean compress) { super(NAME); this.cuVSResourceManagerSupplier = CuVSResourceManager::pooling; if (maxConn <= 0 || maxConn > MAXIMUM_MAX_CONN) { @@ -62,7 +62,7 @@ public ESGpuHnswSQVectorsFormat(int maxConn, int beamWidth, Float confidenceInte @Override public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException { - return new ESGpuHnswVectorsWriter( + return new ES92GpuHnswVectorsWriter( cuVSResourceManagerSupplier.get(), state, maxConn, diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsFormat.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsFormat.java similarity index 89% rename from x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsFormat.java rename to x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsFormat.java index 610904380164c..fe7a70f451b80 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsFormat.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsFormat.java @@ -26,8 +26,8 @@ * Codec format for GPU-accelerated vector indexes. This format is designed to * leverage GPU processing capabilities for vector search operations. */ -public class ESGpuHnswVectorsFormat extends KnnVectorsFormat { - public static final String NAME = "ESGpuHnswVectorsFormat"; +public class ES92GpuHnswVectorsFormat extends KnnVectorsFormat { + public static final String NAME = "ES92GpuHnswVectorsFormat"; public static final int VERSION_START = 0; static final String LUCENE99_HNSW_META_CODEC_NAME = "Lucene99HnswVectorsFormatMeta"; @@ -50,15 +50,15 @@ public class ESGpuHnswVectorsFormat extends KnnVectorsFormat { private final int beamWidth; private final Supplier cuVSResourceManagerSupplier; - public ESGpuHnswVectorsFormat() { + public ES92GpuHnswVectorsFormat() { this(CuVSResourceManager::pooling, DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH); } - public ESGpuHnswVectorsFormat(int maxConn, int beamWidth) { + public ES92GpuHnswVectorsFormat(int maxConn, int beamWidth) { this(CuVSResourceManager::pooling, maxConn, beamWidth); }; - public ESGpuHnswVectorsFormat(Supplier cuVSResourceManagerSupplier, int maxConn, int beamWidth) { + public ES92GpuHnswVectorsFormat(Supplier cuVSResourceManagerSupplier, int maxConn, int beamWidth) { super(NAME); this.cuVSResourceManagerSupplier = cuVSResourceManagerSupplier; this.maxConn = maxConn; @@ -67,7 +67,7 @@ public ESGpuHnswVectorsFormat(Supplier cuVSResourceManagerS @Override public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException { - return new ESGpuHnswVectorsWriter( + return new ES92GpuHnswVectorsWriter( cuVSResourceManagerSupplier.get(), state, maxConn, diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsWriter.java similarity index 96% rename from x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java rename to x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsWriter.java index 6edea5a6d2d5d..a36254e910721 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsWriter.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsWriter.java @@ -56,20 +56,20 @@ import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.SIMILARITY_FUNCTIONS; import static org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsWriter.mergeAndRecalculateQuantiles; import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; -import static org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat.LUCENE99_HNSW_META_CODEC_NAME; -import static org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat.LUCENE99_HNSW_META_EXTENSION; -import static org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat.LUCENE99_HNSW_VECTOR_INDEX_CODEC_NAME; -import static org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat.LUCENE99_HNSW_VECTOR_INDEX_EXTENSION; -import static org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat.LUCENE99_VERSION_CURRENT; -import static org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat.MIN_NUM_VECTORS_FOR_GPU_BUILD; +import static org.elasticsearch.xpack.gpu.codec.ES92GpuHnswVectorsFormat.LUCENE99_HNSW_META_CODEC_NAME; +import static org.elasticsearch.xpack.gpu.codec.ES92GpuHnswVectorsFormat.LUCENE99_HNSW_META_EXTENSION; +import static org.elasticsearch.xpack.gpu.codec.ES92GpuHnswVectorsFormat.LUCENE99_HNSW_VECTOR_INDEX_CODEC_NAME; +import static org.elasticsearch.xpack.gpu.codec.ES92GpuHnswVectorsFormat.LUCENE99_HNSW_VECTOR_INDEX_EXTENSION; +import static org.elasticsearch.xpack.gpu.codec.ES92GpuHnswVectorsFormat.LUCENE99_VERSION_CURRENT; +import static org.elasticsearch.xpack.gpu.codec.ES92GpuHnswVectorsFormat.MIN_NUM_VECTORS_FOR_GPU_BUILD; /** * Writer that builds an Nvidia Carga Graph on GPU and then writes it into the Lucene99 HNSW format, * so that it can be searched on CPU with Lucene99HNSWVectorReader. */ -final class ESGpuHnswVectorsWriter extends KnnVectorsWriter { - private static final Logger logger = LogManager.getLogger(ESGpuHnswVectorsWriter.class); - private static final long SHALLOW_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(ESGpuHnswVectorsWriter.class); +final class ES92GpuHnswVectorsWriter extends KnnVectorsWriter { + private static final Logger logger = LogManager.getLogger(ES92GpuHnswVectorsWriter.class); + private static final long SHALLOW_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(ES92GpuHnswVectorsWriter.class); private static final int LUCENE99_HNSW_DIRECT_MONOTONIC_BLOCK_SHIFT = 16; private final CuVSResourceManager cuVSResourceManager; @@ -83,7 +83,7 @@ final class ESGpuHnswVectorsWriter extends KnnVectorsWriter { private boolean finished; private final CuVSMatrix.DataType dataType; - ESGpuHnswVectorsWriter( + ES92GpuHnswVectorsWriter( CuVSResourceManager cuVSResourceManager, SegmentWriteState state, int M, @@ -154,12 +154,13 @@ public KnnFieldVectorsWriter addField(FieldInfo fieldInfo) throws IOException * This method and the private helpers it calls only need to support FLOAT32. * For FlatFieldVectorWriter we only need to support float[] during flush: during indexing users provide floats[], and pass floats to * FlatFieldVectorWriter, even when we have a BYTE dataType (i.e. an "int8_hnsw" type). - * During merging, we use quantized data, so we need to support byte[] too (see {@link ESGpuHnswVectorsWriter#mergeOneField}), + * During merging, we use quantized data, so we need to support byte[] too (see {@link ES92GpuHnswVectorsWriter#mergeOneField}), * but not here. * That's how our other current formats work: use floats during indexing, and quantized data to build graph during merging. *

*/ @Override + // TODO: fix sorted index case public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException { flatVectorWriter.flush(maxDoc, sortMap); try { @@ -445,6 +446,7 @@ private static void deleteFilesIgnoringExceptions(Directory dir, String fileName // TODO check with deleted documents @Override + // fix sorted index case public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOException { flatVectorWriter.mergeOneField(fieldInfo, mergeState); final int numVectors; diff --git a/x-pack/plugin/gpu/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat b/x-pack/plugin/gpu/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat index 5065e1674b351..7aa308150b6de 100644 --- a/x-pack/plugin/gpu/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat +++ b/x-pack/plugin/gpu/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat @@ -1,3 +1,3 @@ -org.elasticsearch.xpack.gpu.codec.ESGpuHnswVectorsFormat -org.elasticsearch.xpack.gpu.codec.ESGpuHnswSQVectorsFormat +org.elasticsearch.xpack.gpu.codec.ES92GpuHnswVectorsFormat +org.elasticsearch.xpack.gpu.codec.ES92GpuHnswSQVectorsFormat diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswSQVectorsFormatTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswSQVectorsFormatTests.java index d714c2b18618a..c7547c1697cc1 100644 --- a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswSQVectorsFormatTests.java +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswSQVectorsFormatTests.java @@ -29,7 +29,7 @@ public class ESGpuHnswSQVectorsFormatTests extends BaseKnnVectorsFormatTestCase @BeforeClass public static void beforeClass() { assumeTrue("cuvs not supported", GPUSupport.isSupported(false)); - codec = TestUtil.alwaysKnnVectorsFormat(new ESGpuHnswSQVectorsFormat()); + codec = TestUtil.alwaysKnnVectorsFormat(new ES92GpuHnswSQVectorsFormat()); } @Override diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsFormatTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsFormatTests.java index 471c811dc3f2e..7436e13f8acc9 100644 --- a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsFormatTests.java +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsFormatTests.java @@ -30,7 +30,7 @@ public class ESGpuHnswVectorsFormatTests extends BaseKnnVectorsFormatTestCase { @BeforeClass public static void beforeClass() { assumeTrue("cuvs not supported", GPUSupport.isSupported(false)); - codec = TestUtil.alwaysKnnVectorsFormat(new ESGpuHnswVectorsFormat()); + codec = TestUtil.alwaysKnnVectorsFormat(new ES92GpuHnswVectorsFormat()); } @Override diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java index fbb20cbde1baa..45903b1786872 100644 --- a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java @@ -43,7 +43,7 @@ protected Collection getPlugins() { public void testKnnVectorsFormat() throws IOException { // TODO improve test with custom parameters KnnVectorsFormat knnVectorsFormat = getKnnVectorsFormat("hnsw"); - String expectedStr = "ESGpuHnswVectorsFormat(name=ESGpuHnswVectorsFormat, " + String expectedStr = "ES92GpuHnswVectorsFormat(name=ES92GpuHnswVectorsFormat, " + "maxConn=16, beamWidth=128, flatVectorFormat=Lucene99FlatVectorsFormat)"; assertEquals(expectedStr, knnVectorsFormat.toString()); } @@ -52,7 +52,7 @@ public void testKnnVectorsFormat() throws IOException { public void testKnnQuantizedHNSWVectorsFormat() throws IOException { // TOD improve the test with custom parameters KnnVectorsFormat knnVectorsFormat = getKnnVectorsFormat("int8_hnsw"); - String expectedStr = "ESGPUHnswScalarQuantizedVectorsFormat(name=ESGPUHnswScalarQuantizedVectorsFormat, " + String expectedStr = "ES92GPUHnswScalarQuantizedVectorsFormat(name=ES92GPUHnswScalarQuantizedVectorsFormat, " + "maxConn=16, beamWidth=128, flatVectorFormat=ES814ScalarQuantizedVectorsFormat"; assertTrue(knnVectorsFormat.toString().startsWith(expectedStr)); } From 3e46a346565eea5b93f19cf21d18dc5946bf100f Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Fri, 26 Sep 2025 10:53:48 -0400 Subject: [PATCH 092/109] Update docs/changelog/135545.yaml --- docs/changelog/135545.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/135545.yaml diff --git a/docs/changelog/135545.yaml b/docs/changelog/135545.yaml new file mode 100644 index 0000000000000..252c830b85e05 --- /dev/null +++ b/docs/changelog/135545.yaml @@ -0,0 +1,5 @@ +pr: 135545 +summary: Add GPUPlugin for vector indeces on GPU +area: Vector Search +type: feature +issues: [] From 2cd1224d417c16cb07253e1a4e703bbf9791c5a7 Mon Sep 17 00:00:00 2001 From: Brian Seeders Date: Fri, 26 Sep 2025 11:08:11 -0400 Subject: [PATCH 093/109] Add cuvs-java artifact to a custom GCS bucket --- gradle/verification-metadata.xml | 9 +++------ x-pack/plugin/gpu/build.gradle | 6 ++++-- ...astic-cuvs-java-LICENSE.txt => cuvs-java-LICENSE.txt} | 0 ...elastic-cuvs-java-NOTICE.txt => cuvs-java-NOTICE.txt} | 0 4 files changed, 7 insertions(+), 8 deletions(-) rename x-pack/plugin/gpu/licenses/{elastic-cuvs-java-LICENSE.txt => cuvs-java-LICENSE.txt} (100%) rename x-pack/plugin/gpu/licenses/{elastic-cuvs-java-NOTICE.txt => cuvs-java-NOTICE.txt} (100%) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index f888aab908bb4..8682328967634 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -15,9 +15,6 @@ - - - @@ -1197,9 +1194,9 @@
- - - + + + diff --git a/x-pack/plugin/gpu/build.gradle b/x-pack/plugin/gpu/build.gradle index 1ea64f1f1e73d..3b9330371fc47 100644 --- a/x-pack/plugin/gpu/build.gradle +++ b/x-pack/plugin/gpu/build.gradle @@ -14,13 +14,15 @@ base { } repositories { - mavenLocal() + maven { + url = uri("https://storage.googleapis.com/elasticsearch-cuvs-snapshots") + } } dependencies { compileOnly project(path: xpackModule('core')) compileOnly project(':server') - implementation('com.nvidia.cuvs:elastic-cuvs-java:25.10.0') { + implementation('com.nvidia.cuvs:cuvs-java:25.10.0') { changing = true // Ensure that we get updates even when the version number doesn't change. We can remove this once things stabilize } testImplementation(testArtifact(project(xpackModule('core')))) diff --git a/x-pack/plugin/gpu/licenses/elastic-cuvs-java-LICENSE.txt b/x-pack/plugin/gpu/licenses/cuvs-java-LICENSE.txt similarity index 100% rename from x-pack/plugin/gpu/licenses/elastic-cuvs-java-LICENSE.txt rename to x-pack/plugin/gpu/licenses/cuvs-java-LICENSE.txt diff --git a/x-pack/plugin/gpu/licenses/elastic-cuvs-java-NOTICE.txt b/x-pack/plugin/gpu/licenses/cuvs-java-NOTICE.txt similarity index 100% rename from x-pack/plugin/gpu/licenses/elastic-cuvs-java-NOTICE.txt rename to x-pack/plugin/gpu/licenses/cuvs-java-NOTICE.txt From 9d926f20dc4dbb56f0e3fbf19d08fd54a7aac585 Mon Sep 17 00:00:00 2001 From: Brian Seeders Date: Fri, 26 Sep 2025 11:14:59 -0400 Subject: [PATCH 094/109] Fix checksum --- gradle/verification-metadata.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 495faa4300e3e..5b5c9aea68ac3 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -1201,7 +1201,7 @@ - + From 2f07c5ab55318a873d9f1e2e7e595fd65625e41c Mon Sep 17 00:00:00 2001 From: Brian Seeders Date: Fri, 26 Sep 2025 11:27:03 -0400 Subject: [PATCH 095/109] Re-enable regular CI steps in PRs --- .buildkite/pipelines/pull-request/.defaults.yml | 1 - .buildkite/pipelines/pull-request/bwc-snapshots.yml | 1 - .../pipelines/pull-request/packaging-tests-unix-sample.yml | 1 - .../pipelines/pull-request/packaging-tests-windows-sample.yml | 1 - 4 files changed, 4 deletions(-) diff --git a/.buildkite/pipelines/pull-request/.defaults.yml b/.buildkite/pipelines/pull-request/.defaults.yml index f0f87ce6c86b4..a0c82d9ecdded 100644 --- a/.buildkite/pipelines/pull-request/.defaults.yml +++ b/.buildkite/pipelines/pull-request/.defaults.yml @@ -1,7 +1,6 @@ config: skip-labels: - ">test-mute" - - test-gpu # TODO remove this once the non-gpu steps work correctly excluded-regions: - ^docs/.* - ^x-pack/docs/.* diff --git a/.buildkite/pipelines/pull-request/bwc-snapshots.yml b/.buildkite/pipelines/pull-request/bwc-snapshots.yml index e665398431286..961f6ae6d612e 100644 --- a/.buildkite/pipelines/pull-request/bwc-snapshots.yml +++ b/.buildkite/pipelines/pull-request/bwc-snapshots.yml @@ -2,7 +2,6 @@ config: trigger-phrase: '.*run\W+elasticsearch-ci/bwc.*' skip-labels: - ">test-mute" - - test-gpu # TODO remove this once the non-gpu steps work correctly - "test-full-bwc" steps: - group: bwc-snapshots diff --git a/.buildkite/pipelines/pull-request/packaging-tests-unix-sample.yml b/.buildkite/pipelines/pull-request/packaging-tests-unix-sample.yml index d11b3644baa96..b1b619cc833f3 100644 --- a/.buildkite/pipelines/pull-request/packaging-tests-unix-sample.yml +++ b/.buildkite/pipelines/pull-request/packaging-tests-unix-sample.yml @@ -1,7 +1,6 @@ config: skip-labels: - ">test-mute" - - test-gpu # TODO remove this once the non-gpu steps work correctly - ":Delivery/Packaging" steps: - group: packaging-tests-unix-sample diff --git a/.buildkite/pipelines/pull-request/packaging-tests-windows-sample.yml b/.buildkite/pipelines/pull-request/packaging-tests-windows-sample.yml index 0cecd72b9cbb5..6e8fec65f75a5 100644 --- a/.buildkite/pipelines/pull-request/packaging-tests-windows-sample.yml +++ b/.buildkite/pipelines/pull-request/packaging-tests-windows-sample.yml @@ -1,7 +1,6 @@ config: skip-labels: - ">test-mute" - - test-gpu # TODO remove this once the non-gpu steps work correctly - ":Delivery/Packaging" steps: - group: packaging-tests-windows-sample From b9bf617b163346268e4117eb6ea54982d08d3d1b Mon Sep 17 00:00:00 2001 From: Brian Seeders Date: Fri, 26 Sep 2025 12:16:06 -0400 Subject: [PATCH 096/109] Add GCS bucket repo where needed --- qa/vector/build.gradle | 3 +++ 1 file changed, 3 insertions(+) diff --git a/qa/vector/build.gradle b/qa/vector/build.gradle index 263312fa6688e..b0223791797dd 100644 --- a/qa/vector/build.gradle +++ b/qa/vector/build.gradle @@ -24,6 +24,9 @@ tasks.named('forbiddenApisMain').configure { } repositories { mavenLocal() + maven { + url = uri("https://storage.googleapis.com/elasticsearch-cuvs-snapshots") + } } dependencies { From 5301950c51915811c1b1a628bd3c73c66a3c130e Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Fri, 26 Sep 2025 13:56:56 -0400 Subject: [PATCH 097/109] Small cleanups --- .../src/main/java/org/elasticsearch/index/IndexSettings.java | 2 +- .../codec/vectors/ES814ScalarQuantizedVectorsFormat.java | 2 +- .../org/elasticsearch/index/store/LuceneFilesExtensions.java | 5 +---- ...FormatTests.java => ES92GpuHnswSQVectorsFormatTests.java} | 2 +- ...rsFormatTests.java => ES92GpuHnswVectorsFormatTests.java} | 2 +- 5 files changed, 5 insertions(+), 8 deletions(-) rename x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/{ESGpuHnswSQVectorsFormatTests.java => ES92GpuHnswSQVectorsFormatTests.java} (96%) rename x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/{ESGpuHnswVectorsFormatTests.java => ES92GpuHnswVectorsFormatTests.java} (96%) diff --git a/server/src/main/java/org/elasticsearch/index/IndexSettings.java b/server/src/main/java/org/elasticsearch/index/IndexSettings.java index 4652e7bba9a81..6d14483dd6064 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexSettings.java +++ b/server/src/main/java/org/elasticsearch/index/IndexSettings.java @@ -861,7 +861,7 @@ public enum GpuMode { * Setting to control whether to use GPU for vectors indexing. * Currently only applicable for index_options.type: hnsw. * - * If unset or "auto", an automatic decision is made based on the presence of GPU, necessary libraries, vectors' count and dims. + * If unset or "auto", an automatic decision is made based on the presence of GPU, necessary libraries, vectors' index type. * If set to true, GPU must be used for vectors indexing, and if GPU or necessary libraries are not available, * an exception will be thrown. * If set to false, GPU will not be used for vectors indexing. diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java index a4108a1d9ac6b..5925e81091238 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java @@ -132,7 +132,7 @@ public FlatVectorsReader fieldsReader(SegmentReadState state) throws IOException public static final class ES814ScalarQuantizedVectorsWriter extends FlatVectorsWriter { - public final Lucene99ScalarQuantizedVectorsWriter delegate; + final Lucene99ScalarQuantizedVectorsWriter delegate; ES814ScalarQuantizedVectorsWriter(Lucene99ScalarQuantizedVectorsWriter delegate) { super(delegate.getFlatVectorScorer()); diff --git a/server/src/main/java/org/elasticsearch/index/store/LuceneFilesExtensions.java b/server/src/main/java/org/elasticsearch/index/store/LuceneFilesExtensions.java index 8f310febf450e..522db5bb7bbbe 100644 --- a/server/src/main/java/org/elasticsearch/index/store/LuceneFilesExtensions.java +++ b/server/src/main/java/org/elasticsearch/index/store/LuceneFilesExtensions.java @@ -88,10 +88,7 @@ public enum LuceneFilesExtensions { // ivf vectors format MIVF("mivf", "IVF Metadata", true, false), CENIVF("cenivf", "IVF Centroid Data", false, true), - CLIVF("clivf", "IVF Cluster Data", false, true), - // GPU vectors format - GPUIDX("gpuidx", "GPU Vector Index", false, true), - MGPU("mgpu", "GPU Vector Metadata", true, false); + CLIVF("clivf", "IVF Cluster Data", false, true); /** * Allow plugin developers of custom codecs to opt out of the assertion in {@link #fromExtension} diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswSQVectorsFormatTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswSQVectorsFormatTests.java similarity index 96% rename from x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswSQVectorsFormatTests.java rename to x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswSQVectorsFormatTests.java index c7547c1697cc1..f1c13b15795c5 100644 --- a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswSQVectorsFormatTests.java +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswSQVectorsFormatTests.java @@ -17,7 +17,7 @@ import org.junit.BeforeClass; @LuceneTestCase.SuppressSysoutChecks(bugUrl = "https://github.com/rapidsai/cuvs/issues/1310") -public class ESGpuHnswSQVectorsFormatTests extends BaseKnnVectorsFormatTestCase { +public class ES92GpuHnswSQVectorsFormatTests extends BaseKnnVectorsFormatTestCase { static { LogConfigurator.loadLog4jPlugins(); diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsFormatTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsFormatTests.java similarity index 96% rename from x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsFormatTests.java rename to x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsFormatTests.java index 7436e13f8acc9..e7ce310d15d9b 100644 --- a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ESGpuHnswVectorsFormatTests.java +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsFormatTests.java @@ -18,7 +18,7 @@ // CuVS prints tons of logs to stdout @LuceneTestCase.SuppressSysoutChecks(bugUrl = "https://github.com/rapidsai/cuvs/issues/1310") -public class ESGpuHnswVectorsFormatTests extends BaseKnnVectorsFormatTestCase { +public class ES92GpuHnswVectorsFormatTests extends BaseKnnVectorsFormatTestCase { static { LogConfigurator.loadLog4jPlugins(); From 304bc6837e5441ed5d44a82354a1e969c3981b66 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Fri, 26 Sep 2025 16:07:37 -0400 Subject: [PATCH 098/109] Introduce feature flag "vectors_indexing_use_gpu" in IndexSettings instead of previous GPUPlugin to disable this index settings as well. --- .../common/settings/IndexScopedSettings.java | 4 +++- .../org/elasticsearch/index/IndexSettings.java | 2 ++ .../plugin/gpu/GPUPluginInitializationIT.java | 18 +++++++++--------- .../org/elasticsearch/xpack/gpu/GPUPlugin.java | 5 +---- 4 files changed, 15 insertions(+), 14 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java b/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java index 008e564e57640..74cb8ea58b004 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java +++ b/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java @@ -243,7 +243,9 @@ public final class IndexScopedSettings extends AbstractScopedSettings { settings.add(IndexSettings.USE_DOC_VALUES_SKIPPER); } settings.add(IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING); - settings.add(IndexSettings.VECTORS_INDEXING_USE_GPU_SETTING); + if (IndexSettings.VECTORS_INDEXING_USE_GPU) { + settings.add(IndexSettings.VECTORS_INDEXING_USE_GPU_SETTING); + } BUILT_IN_INDEX_SETTINGS = Collections.unmodifiableSet(settings); }; diff --git a/server/src/main/java/org/elasticsearch/index/IndexSettings.java b/server/src/main/java/org/elasticsearch/index/IndexSettings.java index 6d14483dd6064..abb8aee017804 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexSettings.java +++ b/server/src/main/java/org/elasticsearch/index/IndexSettings.java @@ -857,6 +857,8 @@ public enum GpuMode { AUTO } + public static final boolean VECTORS_INDEXING_USE_GPU = new FeatureFlag("vectors_indexing_use_gpu").isEnabled(); + /** * Setting to control whether to use GPU for vectors indexing. * Currently only applicable for index_options.type: hnsw. diff --git a/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUPluginInitializationIT.java b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUPluginInitializationIT.java index 322bf286feeac..01731a6adc68d 100644 --- a/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUPluginInitializationIT.java +++ b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUPluginInitializationIT.java @@ -102,7 +102,7 @@ public void disableMock() { } public void testFFOff() { - assumeFalse("GPU_FORMAT feature flag disabled", GPUPlugin.GPU_FORMAT.isEnabled()); + assumeFalse("GPU_FORMAT feature flag disabled", IndexSettings.VECTORS_INDEXING_USE_GPU); GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); VectorsFormatProvider vectorsFormatProvider = gpuPlugin.getVectorsFormatProvider(); @@ -112,7 +112,7 @@ public void testFFOff() { } public void testIndexSettingOnIndexTypeSupportedGPUSupported() { - assumeTrue("GPU_FORMAT feature flag enabled", GPUPlugin.GPU_FORMAT.isEnabled()); + assumeTrue("GPU_FORMAT feature flag enabled", IndexSettings.VECTORS_INDEXING_USE_GPU); TestCuVSServiceProvider.mockedGPUInfoProvider = SUPPORTED_GPU_PROVIDER; GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); @@ -130,7 +130,7 @@ public void testIndexSettingOnIndexTypeSupportedGPUSupported() { } public void testIndexSettingOnIndexTypeNotSupportedThrows() { - assumeTrue("GPU_FORMAT feature flag enabled", GPUPlugin.GPU_FORMAT.isEnabled()); + assumeTrue("GPU_FORMAT feature flag enabled", IndexSettings.VECTORS_INDEXING_USE_GPU); TestCuVSServiceProvider.mockedGPUInfoProvider = SUPPORTED_GPU_PROVIDER; GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); @@ -148,7 +148,7 @@ public void testIndexSettingOnIndexTypeNotSupportedThrows() { } public void testIndexSettingOnGPUNotSupportedThrows() { - assumeTrue("GPU_FORMAT feature flag enabled", GPUPlugin.GPU_FORMAT.isEnabled()); + assumeTrue("GPU_FORMAT feature flag enabled", IndexSettings.VECTORS_INDEXING_USE_GPU); TestCuVSServiceProvider.mockedGPUInfoProvider = NO_GPU_PROVIDER; GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); @@ -169,7 +169,7 @@ public void testIndexSettingOnGPUNotSupportedThrows() { } public void testIndexSettingOnGPUSupportThrowsRethrows() { - assumeTrue("GPU_FORMAT feature flag enabled", GPUPlugin.GPU_FORMAT.isEnabled()); + assumeTrue("GPU_FORMAT feature flag enabled", IndexSettings.VECTORS_INDEXING_USE_GPU); // Mocks a cuvs-java UnsupportedProvider TestCuVSServiceProvider.mockedGPUInfoProvider = p -> { throw new UnsupportedOperationException("cuvs-java UnsupportedProvider"); }; @@ -191,7 +191,7 @@ public void testIndexSettingOnGPUSupportThrowsRethrows() { } public void testIndexSettingAutoIndexTypeSupportedGPUSupported() { - assumeTrue("GPU_FORMAT feature flag enabled", GPUPlugin.GPU_FORMAT.isEnabled()); + assumeTrue("GPU_FORMAT feature flag enabled", IndexSettings.VECTORS_INDEXING_USE_GPU); TestCuVSServiceProvider.mockedGPUInfoProvider = SUPPORTED_GPU_PROVIDER; GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); @@ -209,7 +209,7 @@ public void testIndexSettingAutoIndexTypeSupportedGPUSupported() { } public void testIndexSettingAutoGPUNotSupported() { - assumeTrue("GPU_FORMAT feature flag enabled", GPUPlugin.GPU_FORMAT.isEnabled()); + assumeTrue("GPU_FORMAT feature flag enabled", IndexSettings.VECTORS_INDEXING_USE_GPU); TestCuVSServiceProvider.mockedGPUInfoProvider = NO_GPU_PROVIDER; GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); @@ -227,7 +227,7 @@ public void testIndexSettingAutoGPUNotSupported() { } public void testIndexSettingAutoIndexTypeNotSupported() { - assumeTrue("GPU_FORMAT feature flag enabled", GPUPlugin.GPU_FORMAT.isEnabled()); + assumeTrue("GPU_FORMAT feature flag enabled", IndexSettings.VECTORS_INDEXING_USE_GPU); TestCuVSServiceProvider.mockedGPUInfoProvider = SUPPORTED_GPU_PROVIDER; GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); @@ -245,7 +245,7 @@ public void testIndexSettingAutoIndexTypeNotSupported() { } public void testIndexSettingOff() { - assumeTrue("GPU_FORMAT feature flag enabled", GPUPlugin.GPU_FORMAT.isEnabled()); + assumeTrue("GPU_FORMAT feature flag enabled", IndexSettings.VECTORS_INDEXING_USE_GPU); TestCuVSServiceProvider.mockedGPUInfoProvider = SUPPORTED_GPU_PROVIDER; GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java index 67428851c2cb8..f138069c97b0d 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java @@ -8,7 +8,6 @@ import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.util.hnsw.HnswGraphBuilder; -import org.elasticsearch.common.util.FeatureFlag; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; import org.elasticsearch.index.mapper.vectors.VectorsFormatProvider; @@ -19,12 +18,10 @@ public class GPUPlugin extends Plugin implements MapperPlugin { - public static final FeatureFlag GPU_FORMAT = new FeatureFlag("gpu_format"); - @Override public VectorsFormatProvider getVectorsFormatProvider() { return (indexSettings, indexOptions) -> { - if (GPU_FORMAT.isEnabled()) { + if (IndexSettings.VECTORS_INDEXING_USE_GPU) { IndexSettings.GpuMode gpuMode = indexSettings.getValue(IndexSettings.VECTORS_INDEXING_USE_GPU_SETTING); if (gpuMode == IndexSettings.GpuMode.TRUE) { if (vectorIndexTypeSupported(indexOptions.getType()) == false) { From c410d92db761b0df295d53bf917dd37d32b1b222 Mon Sep 17 00:00:00 2001 From: "elastic-vault-github-plugin-prod[bot]" <150874479+elastic-vault-github-plugin-prod[bot]@users.noreply.github.com> Date: Sat, 27 Sep 2025 04:32:03 +0000 Subject: [PATCH 099/109] Update cuvs snapshot version to 25.10.0 --- .buildkite/scripts/cuvs-snapshot/current-snapshot-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version index 8eea338605f26..4083bb9805db7 100644 --- a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version +++ b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version @@ -1 +1 @@ -e32b14db +172c023d From d1481d7610926231a9d3ce0ca58c29aa9ba9b905 Mon Sep 17 00:00:00 2001 From: "elastic-vault-github-plugin-prod[bot]" <150874479+elastic-vault-github-plugin-prod[bot]@users.noreply.github.com> Date: Sun, 28 Sep 2025 04:31:48 +0000 Subject: [PATCH 100/109] Update cuvs snapshot version to 25.10.0 --- .buildkite/scripts/cuvs-snapshot/current-snapshot-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version index 4083bb9805db7..806e86639af24 100644 --- a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version +++ b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version @@ -1 +1 @@ -172c023d +91d72b17 From ae5cc15650a1d5577aaede2cfadf46b1ac2d2b2c Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Sun, 28 Sep 2025 08:43:38 -0400 Subject: [PATCH 101/109] GPUPlugin to implement InternalVectorFormatProviderPlugin instead of previous MapperPlugin --- server/src/main/java/module-info.java | 3 ++- .../elasticsearch/indices/IndicesModule.java | 19 +++++++++++----- .../elasticsearch/node/NodeConstruction.java | 7 +++++- .../elasticsearch/plugins/MapperPlugin.java | 8 ------- .../InternalVectorFormatProviderPlugin.java | 22 +++++++++++++++++++ .../query/SearchExecutionContextTests.java | 2 +- .../index/mapper/MapperServiceTestCase.java | 5 +++++ .../elasticsearch/xpack/gpu/GPUPlugin.java | 4 ++-- 8 files changed, 51 insertions(+), 19 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/plugins/internal/InternalVectorFormatProviderPlugin.java diff --git a/server/src/main/java/module-info.java b/server/src/main/java/module-info.java index 2e80a65a570d5..7db4506d406d7 100644 --- a/server/src/main/java/module-info.java +++ b/server/src/main/java/module-info.java @@ -400,7 +400,8 @@ org.elasticsearch.settings.secure, org.elasticsearch.serverless.constants, org.elasticsearch.serverless.apifiltering, - org.elasticsearch.internal.security; + org.elasticsearch.internal.security, + org.elasticsearch.gpu; exports org.elasticsearch.telemetry.tracing; exports org.elasticsearch.telemetry; diff --git a/server/src/main/java/org/elasticsearch/indices/IndicesModule.java b/server/src/main/java/org/elasticsearch/indices/IndicesModule.java index 05bbeb59a136f..ad1608bfdff27 100644 --- a/server/src/main/java/org/elasticsearch/indices/IndicesModule.java +++ b/server/src/main/java/org/elasticsearch/indices/IndicesModule.java @@ -79,6 +79,7 @@ import org.elasticsearch.injection.guice.AbstractModule; import org.elasticsearch.plugins.FieldPredicate; import org.elasticsearch.plugins.MapperPlugin; +import org.elasticsearch.plugins.internal.InternalVectorFormatProviderPlugin; import org.elasticsearch.xcontent.NamedXContentRegistry; import org.elasticsearch.xcontent.ParseField; @@ -97,19 +98,23 @@ public class IndicesModule extends AbstractModule { private final MapperRegistry mapperRegistry; - public IndicesModule(List mapperPlugins, RootObjectMapperNamespaceValidator namespaceValidator) { + public IndicesModule( + List mapperPlugins, + List vectorFormatProviderPlugins, + RootObjectMapperNamespaceValidator namespaceValidator + ) { this.mapperRegistry = new MapperRegistry( getMappers(mapperPlugins), getRuntimeFields(mapperPlugins), getMetadataMappers(mapperPlugins), getFieldFilter(mapperPlugins), - getVectorFormatProviders(mapperPlugins), + getVectorFormatProviders(vectorFormatProviderPlugins), namespaceValidator ); } public IndicesModule(List mapperPlugins) { - this(mapperPlugins, null); + this(mapperPlugins, Collections.emptyList(), null); } public static List getNamedWriteables() { @@ -230,10 +235,12 @@ public static Map getMappers(List mappe return Collections.unmodifiableMap(mappers); } - private static List getVectorFormatProviders(List mapperPlugins) { + private static List getVectorFormatProviders( + List vectorFormatProviderPlugins + ) { List vectorsFormatProviders = new ArrayList<>(); - for (MapperPlugin mapperPlugin : mapperPlugins) { - VectorsFormatProvider vectorsFormatProvider = mapperPlugin.getVectorsFormatProvider(); + for (InternalVectorFormatProviderPlugin plugin : vectorFormatProviderPlugins) { + VectorsFormatProvider vectorsFormatProvider = plugin.getVectorsFormatProvider(); if (vectorsFormatProvider != null) { vectorsFormatProviders.add(vectorsFormatProvider); } diff --git a/server/src/main/java/org/elasticsearch/node/NodeConstruction.java b/server/src/main/java/org/elasticsearch/node/NodeConstruction.java index 548ee6f4da22e..7a598475fc456 100644 --- a/server/src/main/java/org/elasticsearch/node/NodeConstruction.java +++ b/server/src/main/java/org/elasticsearch/node/NodeConstruction.java @@ -185,6 +185,7 @@ import org.elasticsearch.plugins.TelemetryPlugin; import org.elasticsearch.plugins.internal.DocumentParsingProvider; import org.elasticsearch.plugins.internal.DocumentParsingProviderPlugin; +import org.elasticsearch.plugins.internal.InternalVectorFormatProviderPlugin; import org.elasticsearch.plugins.internal.LoggingDataProvider; import org.elasticsearch.plugins.internal.ReloadAwarePlugin; import org.elasticsearch.plugins.internal.RestExtension; @@ -822,7 +823,11 @@ private void construct( )::onNewInfo ); - IndicesModule indicesModule = new IndicesModule(pluginsService.filterPlugins(MapperPlugin.class).toList(), namespaceValidator); + IndicesModule indicesModule = new IndicesModule( + pluginsService.filterPlugins(MapperPlugin.class).toList(), + pluginsService.filterPlugins(InternalVectorFormatProviderPlugin.class).toList(), + namespaceValidator + ); modules.add(indicesModule); modules.add(new GatewayModule()); diff --git a/server/src/main/java/org/elasticsearch/plugins/MapperPlugin.java b/server/src/main/java/org/elasticsearch/plugins/MapperPlugin.java index 9f27427252d90..c82bc286a90c8 100644 --- a/server/src/main/java/org/elasticsearch/plugins/MapperPlugin.java +++ b/server/src/main/java/org/elasticsearch/plugins/MapperPlugin.java @@ -12,7 +12,6 @@ import org.elasticsearch.index.mapper.Mapper; import org.elasticsearch.index.mapper.MetadataFieldMapper; import org.elasticsearch.index.mapper.RuntimeField; -import org.elasticsearch.index.mapper.vectors.VectorsFormatProvider; import java.util.Collections; import java.util.Map; @@ -66,13 +65,6 @@ default Function getFieldFilter() { return NOOP_FIELD_FILTER; } - /** - * Returns {VectorFormatProvider} implementations added by this plugin. - */ - default VectorsFormatProvider getVectorsFormatProvider() { - return null; - } - /** * The default field filter applied, which doesn't filter anything. That means that by default get mappings, get index * get field mappings and field capabilities API will return every field that's present in the mappings. diff --git a/server/src/main/java/org/elasticsearch/plugins/internal/InternalVectorFormatProviderPlugin.java b/server/src/main/java/org/elasticsearch/plugins/internal/InternalVectorFormatProviderPlugin.java new file mode 100644 index 0000000000000..84b3d964fd2ba --- /dev/null +++ b/server/src/main/java/org/elasticsearch/plugins/internal/InternalVectorFormatProviderPlugin.java @@ -0,0 +1,22 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.plugins.internal; + +import org.elasticsearch.index.mapper.vectors.VectorsFormatProvider; + +public interface InternalVectorFormatProviderPlugin { + + /** + * Returns {VectorFormatProvider} implementations added by this plugin. + */ + default VectorsFormatProvider getVectorsFormatProvider() { + return null; + } +} diff --git a/server/src/test/java/org/elasticsearch/index/query/SearchExecutionContextTests.java b/server/src/test/java/org/elasticsearch/index/query/SearchExecutionContextTests.java index 43915458ebcae..9c7f9a6d1e35e 100644 --- a/server/src/test/java/org/elasticsearch/index/query/SearchExecutionContextTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/SearchExecutionContextTests.java @@ -630,7 +630,7 @@ private static MapperService createMapperServiceWithNamespaceValidator( RootObjectMapperNamespaceValidator namespaceValidator ) { IndexAnalyzers indexAnalyzers = IndexAnalyzers.of(singletonMap("default", new NamedAnalyzer("default", AnalyzerScope.INDEX, null))); - IndicesModule indicesModule = new IndicesModule(Collections.emptyList(), namespaceValidator); + IndicesModule indicesModule = new IndicesModule(Collections.emptyList(), Collections.emptyList(), namespaceValidator); MapperRegistry mapperRegistry = indicesModule.getMapperRegistry(); Supplier searchExecutionContextSupplier = () -> { throw new UnsupportedOperationException(); }; MapperService mapperService = mock(MapperService.class); diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java index 4261e5845f3a8..8ccab79c1c6ae 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java @@ -63,6 +63,7 @@ import org.elasticsearch.plugins.MapperPlugin; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.plugins.TelemetryPlugin; +import org.elasticsearch.plugins.internal.InternalVectorFormatProviderPlugin; import org.elasticsearch.script.Script; import org.elasticsearch.script.ScriptCompiler; import org.elasticsearch.script.ScriptContext; @@ -312,6 +313,10 @@ public MapperService build() { SimilarityService similarityService = new SimilarityService(indexSettings, null, Map.of()); MapperRegistry mapperRegistry = new IndicesModule( plugins.stream().filter(p -> p instanceof MapperPlugin).map(p -> (MapperPlugin) p).collect(toList()), + plugins.stream() + .filter(p -> p instanceof InternalVectorFormatProviderPlugin) + .map(p -> (InternalVectorFormatProviderPlugin) p) + .collect(toList()), namespaceValidator ).getMapperRegistry(); diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java index f138069c97b0d..619eedf6c2270 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java @@ -11,12 +11,12 @@ import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; import org.elasticsearch.index.mapper.vectors.VectorsFormatProvider; -import org.elasticsearch.plugins.MapperPlugin; import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.plugins.internal.InternalVectorFormatProviderPlugin; import org.elasticsearch.xpack.gpu.codec.ES92GpuHnswSQVectorsFormat; import org.elasticsearch.xpack.gpu.codec.ES92GpuHnswVectorsFormat; -public class GPUPlugin extends Plugin implements MapperPlugin { +public class GPUPlugin extends Plugin implements InternalVectorFormatProviderPlugin { @Override public VectorsFormatProvider getVectorsFormatProvider() { From 70ae1cdd62e98192dbe3e6674bdadccc6d4df7dc Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Sun, 28 Sep 2025 08:50:22 -0400 Subject: [PATCH 102/109] Change license to Lucene --- .../codec/MergedQuantizedVectorValues.java | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/MergedQuantizedVectorValues.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/MergedQuantizedVectorValues.java index 32355723bde5e..4d3d5013dd381 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/MergedQuantizedVectorValues.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/MergedQuantizedVectorValues.java @@ -1,8 +1,21 @@ /* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. + * @notice + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Modifications copyright (C) 2025 Elasticsearch B.V. */ package org.elasticsearch.xpack.gpu.codec; From 55ee270fa26de13d607bfb12b872257d53dbefc0 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Sun, 28 Sep 2025 12:28:19 -0400 Subject: [PATCH 103/109] Switch to group varint when writing vectorIndex --- .../xpack/gpu/codec/ES92GpuHnswVectorsFormat.java | 4 ++-- .../xpack/gpu/codec/ES92GpuHnswVectorsWriter.java | 13 ++++++------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsFormat.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsFormat.java index fe7a70f451b80..47fcb39056aee 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsFormat.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsFormat.java @@ -28,13 +28,13 @@ */ public class ES92GpuHnswVectorsFormat extends KnnVectorsFormat { public static final String NAME = "ES92GpuHnswVectorsFormat"; - public static final int VERSION_START = 0; + public static final int VERSION_GROUPVARINT = 1; static final String LUCENE99_HNSW_META_CODEC_NAME = "Lucene99HnswVectorsFormatMeta"; static final String LUCENE99_HNSW_VECTOR_INDEX_CODEC_NAME = "Lucene99HnswVectorsFormatIndex"; static final String LUCENE99_HNSW_META_EXTENSION = "vem"; static final String LUCENE99_HNSW_VECTOR_INDEX_EXTENSION = "vex"; - static final int LUCENE99_VERSION_CURRENT = VERSION_START; + static final int LUCENE99_VERSION_CURRENT = VERSION_GROUPVARINT; static final int DEFAULT_MAX_CONN = 16; // graph degree public static final int DEFAULT_BEAM_WIDTH = 128; // intermediate graph degree diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsWriter.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsWriter.java index a36254e910721..f848f715f913b 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsWriter.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsWriter.java @@ -343,7 +343,10 @@ private HnswGraph writeGraph(CuVSMatrix cagraGraph, int[][] levelNodeOffsets) th long offsetStart = vectorIndex.getFilePointer(); Arrays.sort(neighbors); int actualSize = 0; - scratch[actualSize++] = neighbors[0]; + if (maxGraphDegree > 0) { + scratch[0] = neighbors[0]; + actualSize = 1; + } for (int i = 1; i < maxGraphDegree; i++) { assert neighbors[i] < maxElementCount : "node too large: " + neighbors[i] + ">=" + maxElementCount; if (neighbors[i - 1] == neighbors[i]) { @@ -353,9 +356,7 @@ private HnswGraph writeGraph(CuVSMatrix cagraGraph, int[][] levelNodeOffsets) th } // Write the size after duplicates are removed vectorIndex.writeVInt(actualSize); - for (int i = 0; i < actualSize; i++) { - vectorIndex.writeVInt(scratch[i]); - } + vectorIndex.writeGroupVInts(scratch, actualSize); levelNodeOffsets[0][node] = Math.toIntExact(vectorIndex.getFilePointer() - offsetStart); } if (logger.isDebugEnabled()) { @@ -387,9 +388,7 @@ private HnswGraph writeMockGraph(int elementCount, int[][] levelNodeOffsets) thr long offsetStart = vectorIndex.getFilePointer(); vectorIndex.writeVInt(nodeDegree); - for (int i = 0; i < nodeDegree; i++) { - vectorIndex.writeVInt(scratch[i]); - } + vectorIndex.writeGroupVInts(scratch, nodeDegree); levelNodeOffsets[0][node] = Math.toIntExact(vectorIndex.getFilePointer() - offsetStart); } return createMockGraph(elementCount, nodeDegree); From 3cb4b71d4cf0215e4ac375a08e8da532ffd207e5 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Mon, 29 Sep 2025 16:04:43 -0400 Subject: [PATCH 104/109] Revert "Introduce feature flag "vectors_indexing_use_gpu" in IndexSettings" This reverts commit 304bc6837e5441ed5d44a82354a1e969c3981b66. --- .../common/settings/IndexScopedSettings.java | 4 +--- .../org/elasticsearch/index/IndexSettings.java | 2 -- .../plugin/gpu/GPUPluginInitializationIT.java | 18 +++++++++--------- .../org/elasticsearch/xpack/gpu/GPUPlugin.java | 5 ++++- 4 files changed, 14 insertions(+), 15 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java b/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java index 74cb8ea58b004..008e564e57640 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java +++ b/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java @@ -243,9 +243,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings { settings.add(IndexSettings.USE_DOC_VALUES_SKIPPER); } settings.add(IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING); - if (IndexSettings.VECTORS_INDEXING_USE_GPU) { - settings.add(IndexSettings.VECTORS_INDEXING_USE_GPU_SETTING); - } + settings.add(IndexSettings.VECTORS_INDEXING_USE_GPU_SETTING); BUILT_IN_INDEX_SETTINGS = Collections.unmodifiableSet(settings); }; diff --git a/server/src/main/java/org/elasticsearch/index/IndexSettings.java b/server/src/main/java/org/elasticsearch/index/IndexSettings.java index abb8aee017804..6d14483dd6064 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexSettings.java +++ b/server/src/main/java/org/elasticsearch/index/IndexSettings.java @@ -857,8 +857,6 @@ public enum GpuMode { AUTO } - public static final boolean VECTORS_INDEXING_USE_GPU = new FeatureFlag("vectors_indexing_use_gpu").isEnabled(); - /** * Setting to control whether to use GPU for vectors indexing. * Currently only applicable for index_options.type: hnsw. diff --git a/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUPluginInitializationIT.java b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUPluginInitializationIT.java index 01731a6adc68d..322bf286feeac 100644 --- a/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUPluginInitializationIT.java +++ b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUPluginInitializationIT.java @@ -102,7 +102,7 @@ public void disableMock() { } public void testFFOff() { - assumeFalse("GPU_FORMAT feature flag disabled", IndexSettings.VECTORS_INDEXING_USE_GPU); + assumeFalse("GPU_FORMAT feature flag disabled", GPUPlugin.GPU_FORMAT.isEnabled()); GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); VectorsFormatProvider vectorsFormatProvider = gpuPlugin.getVectorsFormatProvider(); @@ -112,7 +112,7 @@ public void testFFOff() { } public void testIndexSettingOnIndexTypeSupportedGPUSupported() { - assumeTrue("GPU_FORMAT feature flag enabled", IndexSettings.VECTORS_INDEXING_USE_GPU); + assumeTrue("GPU_FORMAT feature flag enabled", GPUPlugin.GPU_FORMAT.isEnabled()); TestCuVSServiceProvider.mockedGPUInfoProvider = SUPPORTED_GPU_PROVIDER; GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); @@ -130,7 +130,7 @@ public void testIndexSettingOnIndexTypeSupportedGPUSupported() { } public void testIndexSettingOnIndexTypeNotSupportedThrows() { - assumeTrue("GPU_FORMAT feature flag enabled", IndexSettings.VECTORS_INDEXING_USE_GPU); + assumeTrue("GPU_FORMAT feature flag enabled", GPUPlugin.GPU_FORMAT.isEnabled()); TestCuVSServiceProvider.mockedGPUInfoProvider = SUPPORTED_GPU_PROVIDER; GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); @@ -148,7 +148,7 @@ public void testIndexSettingOnIndexTypeNotSupportedThrows() { } public void testIndexSettingOnGPUNotSupportedThrows() { - assumeTrue("GPU_FORMAT feature flag enabled", IndexSettings.VECTORS_INDEXING_USE_GPU); + assumeTrue("GPU_FORMAT feature flag enabled", GPUPlugin.GPU_FORMAT.isEnabled()); TestCuVSServiceProvider.mockedGPUInfoProvider = NO_GPU_PROVIDER; GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); @@ -169,7 +169,7 @@ public void testIndexSettingOnGPUNotSupportedThrows() { } public void testIndexSettingOnGPUSupportThrowsRethrows() { - assumeTrue("GPU_FORMAT feature flag enabled", IndexSettings.VECTORS_INDEXING_USE_GPU); + assumeTrue("GPU_FORMAT feature flag enabled", GPUPlugin.GPU_FORMAT.isEnabled()); // Mocks a cuvs-java UnsupportedProvider TestCuVSServiceProvider.mockedGPUInfoProvider = p -> { throw new UnsupportedOperationException("cuvs-java UnsupportedProvider"); }; @@ -191,7 +191,7 @@ public void testIndexSettingOnGPUSupportThrowsRethrows() { } public void testIndexSettingAutoIndexTypeSupportedGPUSupported() { - assumeTrue("GPU_FORMAT feature flag enabled", IndexSettings.VECTORS_INDEXING_USE_GPU); + assumeTrue("GPU_FORMAT feature flag enabled", GPUPlugin.GPU_FORMAT.isEnabled()); TestCuVSServiceProvider.mockedGPUInfoProvider = SUPPORTED_GPU_PROVIDER; GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); @@ -209,7 +209,7 @@ public void testIndexSettingAutoIndexTypeSupportedGPUSupported() { } public void testIndexSettingAutoGPUNotSupported() { - assumeTrue("GPU_FORMAT feature flag enabled", IndexSettings.VECTORS_INDEXING_USE_GPU); + assumeTrue("GPU_FORMAT feature flag enabled", GPUPlugin.GPU_FORMAT.isEnabled()); TestCuVSServiceProvider.mockedGPUInfoProvider = NO_GPU_PROVIDER; GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); @@ -227,7 +227,7 @@ public void testIndexSettingAutoGPUNotSupported() { } public void testIndexSettingAutoIndexTypeNotSupported() { - assumeTrue("GPU_FORMAT feature flag enabled", IndexSettings.VECTORS_INDEXING_USE_GPU); + assumeTrue("GPU_FORMAT feature flag enabled", GPUPlugin.GPU_FORMAT.isEnabled()); TestCuVSServiceProvider.mockedGPUInfoProvider = SUPPORTED_GPU_PROVIDER; GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); @@ -245,7 +245,7 @@ public void testIndexSettingAutoIndexTypeNotSupported() { } public void testIndexSettingOff() { - assumeTrue("GPU_FORMAT feature flag enabled", IndexSettings.VECTORS_INDEXING_USE_GPU); + assumeTrue("GPU_FORMAT feature flag enabled", GPUPlugin.GPU_FORMAT.isEnabled()); TestCuVSServiceProvider.mockedGPUInfoProvider = SUPPORTED_GPU_PROVIDER; GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java index 619eedf6c2270..0a83e85e4ce1a 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java @@ -8,6 +8,7 @@ import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.util.hnsw.HnswGraphBuilder; +import org.elasticsearch.common.util.FeatureFlag; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; import org.elasticsearch.index.mapper.vectors.VectorsFormatProvider; @@ -18,10 +19,12 @@ public class GPUPlugin extends Plugin implements InternalVectorFormatProviderPlugin { + public static final FeatureFlag GPU_FORMAT = new FeatureFlag("gpu_format"); + @Override public VectorsFormatProvider getVectorsFormatProvider() { return (indexSettings, indexOptions) -> { - if (IndexSettings.VECTORS_INDEXING_USE_GPU) { + if (GPU_FORMAT.isEnabled()) { IndexSettings.GpuMode gpuMode = indexSettings.getValue(IndexSettings.VECTORS_INDEXING_USE_GPU_SETTING); if (gpuMode == IndexSettings.GpuMode.TRUE) { if (vectorIndexTypeSupported(indexOptions.getType()) == false) { From 2d51f5a3ae2edb8ccd083e357044bb4839cb3e24 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Mon, 29 Sep 2025 16:05:02 -0400 Subject: [PATCH 105/109] Move "index.vectors.indexing.use_gpu" setting under GPUPlugin --- .../common/settings/IndexScopedSettings.java | 1 - .../elasticsearch/index/IndexSettings.java | 47 ------------ .../index/IndexSettingsTests.java | 32 -------- .../action/TransportResumeFollowAction.java | 1 - .../plugin/gpu/GPUPluginInitializationIT.java | 73 ++++++++----------- .../elasticsearch/xpack/gpu/GPUPlugin.java | 47 ++++++++++-- 6 files changed, 73 insertions(+), 128 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java b/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java index 008e564e57640..a0a28e9322956 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java +++ b/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java @@ -243,7 +243,6 @@ public final class IndexScopedSettings extends AbstractScopedSettings { settings.add(IndexSettings.USE_DOC_VALUES_SKIPPER); } settings.add(IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING); - settings.add(IndexSettings.VECTORS_INDEXING_USE_GPU_SETTING); BUILT_IN_INDEX_SETTINGS = Collections.unmodifiableSet(settings); }; diff --git a/server/src/main/java/org/elasticsearch/index/IndexSettings.java b/server/src/main/java/org/elasticsearch/index/IndexSettings.java index 6d14483dd6064..e9fee0a4dacc8 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexSettings.java +++ b/server/src/main/java/org/elasticsearch/index/IndexSettings.java @@ -848,32 +848,6 @@ private static int getIgnoreAboveDefaultValue(final Settings settings) { Property.ServerlessPublic ); - /** - * An enum for the tri-state value of the `index.vectors.indexing.use_gpu` setting. - */ - public enum GpuMode { - TRUE, - FALSE, - AUTO - } - - /** - * Setting to control whether to use GPU for vectors indexing. - * Currently only applicable for index_options.type: hnsw. - * - * If unset or "auto", an automatic decision is made based on the presence of GPU, necessary libraries, vectors' index type. - * If set to true, GPU must be used for vectors indexing, and if GPU or necessary libraries are not available, - * an exception will be thrown. - * If set to false, GPU will not be used for vectors indexing. - */ - public static final Setting VECTORS_INDEXING_USE_GPU_SETTING = Setting.enumSetting( - GpuMode.class, - "index.vectors.indexing.use_gpu", - GpuMode.AUTO, - Property.IndexScope, - Property.Dynamic - ); - private final Index index; private final IndexVersion version; private final Logger logger; @@ -972,8 +946,6 @@ private void setRetentionLeaseMillis(final TimeValue retentionLease) { */ private volatile int maxSlicesPerScroll; - private volatile GpuMode useGpuForVectorsIndexing; - /** * The maximum length of regex string allowed in a regexp query. */ @@ -1149,7 +1121,6 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti && scopedSettings.get(RECOVERY_USE_SYNTHETIC_SOURCE_SETTING); useDocValuesSkipper = DOC_VALUES_SKIPPER && scopedSettings.get(USE_DOC_VALUES_SKIPPER); seqNoIndexOptions = scopedSettings.get(SEQ_NO_INDEX_OPTIONS_SETTING); - this.useGpuForVectorsIndexing = scopedSettings.get(VECTORS_INDEXING_USE_GPU_SETTING); if (recoverySourceSyntheticEnabled) { if (DiscoveryNode.isStateless(settings)) { throw new IllegalArgumentException("synthetic recovery source is only allowed in stateful"); @@ -1172,7 +1143,6 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti } } - scopedSettings.addSettingsUpdateConsumer(VECTORS_INDEXING_USE_GPU_SETTING, this::setUseGpuForVectorsIndexing); scopedSettings.addSettingsUpdateConsumer( MergePolicyConfig.INDEX_COMPOUND_FORMAT_SETTING, mergePolicyConfig::setCompoundFormatThreshold @@ -1899,23 +1869,6 @@ private void setHnswEarlyTermination(boolean earlyTermination) { this.earlyTermination = earlyTermination; } - private void setUseGpuForVectorsIndexing(GpuMode useGpuForVectorsIndexing) { - this.useGpuForVectorsIndexing = useGpuForVectorsIndexing; - } - - /** - * Whether to use GPU for vectors indexing. - * Currently only applicable for index_options.type: hnsw - * - * @return GpuMode.TRUE if GPU must be used for vectors indexing; - * GpuMode.FALSE if GPU will not be used, or - * GpuMode.AUTO if the setting is not set, - * meaning automatic decision is maded on the presence of GPU, libraries, vectors' count and dims. - */ - public GpuMode useGpuForVectorsIndexing() { - return useGpuForVectorsIndexing; - } - public SeqNoFieldMapper.SeqNoIndexOptions seqNoIndexOptions() { return seqNoIndexOptions; } diff --git a/server/src/test/java/org/elasticsearch/index/IndexSettingsTests.java b/server/src/test/java/org/elasticsearch/index/IndexSettingsTests.java index 254ce0335418e..ea18734190d7f 100644 --- a/server/src/test/java/org/elasticsearch/index/IndexSettingsTests.java +++ b/server/src/test/java/org/elasticsearch/index/IndexSettingsTests.java @@ -922,36 +922,4 @@ public void testSame() { assertTrue(IndexSettings.same(settings, differentOtherSettingBuilder.build())); } - public void testVectorsUseGpuSetting() { - IndexMetadata metadata = newIndexMeta( - "index", - Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, IndexVersion.current()).build() - ); - IndexSettings settings = new IndexSettings(metadata, Settings.EMPTY); - assertEquals(IndexSettings.GpuMode.AUTO, settings.useGpuForVectorsIndexing()); - - settings.updateIndexMetadata( - newIndexMeta("index", Settings.builder().put(IndexSettings.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), true).build()) - ); - assertEquals(IndexSettings.GpuMode.TRUE, settings.useGpuForVectorsIndexing()); - - settings.updateIndexMetadata( - newIndexMeta("index", Settings.builder().put(IndexSettings.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), false).build()) - ); - assertEquals(IndexSettings.GpuMode.FALSE, settings.useGpuForVectorsIndexing()); - - settings.updateIndexMetadata(newIndexMeta("index", Settings.EMPTY)); - assertEquals(IndexSettings.GpuMode.AUTO, settings.useGpuForVectorsIndexing()); - - IllegalArgumentException e = expectThrows( - IllegalArgumentException.class, - () -> settings.updateIndexMetadata( - newIndexMeta("index", Settings.builder().put("index.vectors.indexing.use_gpu", "unknown").build()) - ) - ); - assertThat( - e.getMessage(), - Matchers.containsString("illegal value can't update [index.vectors.indexing.use_gpu] from [AUTO] to [unknown]") - ); - } } diff --git a/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/TransportResumeFollowAction.java b/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/TransportResumeFollowAction.java index 3bcc5ecb87d24..b0be3e21bbc7c 100644 --- a/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/TransportResumeFollowAction.java +++ b/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/TransportResumeFollowAction.java @@ -496,7 +496,6 @@ static String[] extractLeaderShardHistoryUUIDs(Map ccrIndexMetad IndexSettings.INDEX_FLUSH_AFTER_MERGE_THRESHOLD_SIZE_SETTING, IndexSettings.INDEX_GC_DELETES_SETTING, IndexSettings.MAX_REFRESH_LISTENERS_PER_SHARD, - IndexSettings.VECTORS_INDEXING_USE_GPU_SETTING, IndicesRequestCache.INDEX_CACHE_REQUEST_ENABLED_SETTING, BitsetFilterCache.INDEX_LOAD_RANDOM_ACCESS_FILTERS_EAGERLY_SETTING, SearchSlowLog.INDEX_SEARCH_SLOWLOG_THRESHOLD_FETCH_DEBUG_SETTING, diff --git a/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUPluginInitializationIT.java b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUPluginInitializationIT.java index 322bf286feeac..c53095900c18d 100644 --- a/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUPluginInitializationIT.java +++ b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUPluginInitializationIT.java @@ -15,9 +15,11 @@ import com.nvidia.cuvs.spi.CuVSServiceProvider; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.IndexService; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldTypeTests; import org.elasticsearch.index.mapper.vectors.VectorsFormatProvider; +import org.elasticsearch.indices.IndicesService; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.xpack.gpu.GPUPlugin; @@ -27,7 +29,6 @@ import java.util.List; import java.util.function.Function; -import static org.elasticsearch.index.IndexSettingsTests.newIndexMeta; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.startsWith; @@ -118,11 +119,8 @@ public void testIndexSettingOnIndexTypeSupportedGPUSupported() { GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); VectorsFormatProvider vectorsFormatProvider = gpuPlugin.getVectorsFormatProvider(); - final var metadata = newIndexMeta( - "index1", - Settings.builder().put(IndexSettings.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), IndexSettings.GpuMode.TRUE).build() - ); - final var settings = new IndexSettings(metadata, Settings.EMPTY); + createIndex("index1", Settings.builder().put(GPUPlugin.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), GPUPlugin.GpuMode.TRUE).build()); + IndexSettings settings = getIndexSettings(); final var indexOptions = DenseVectorFieldTypeTests.randomGpuSupportedIndexOptions(); var format = vectorsFormatProvider.getKnnVectorsFormat(settings, indexOptions); @@ -136,11 +134,8 @@ public void testIndexSettingOnIndexTypeNotSupportedThrows() { GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); VectorsFormatProvider vectorsFormatProvider = gpuPlugin.getVectorsFormatProvider(); - final var metadata = newIndexMeta( - "index1", - Settings.builder().put(IndexSettings.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), IndexSettings.GpuMode.TRUE).build() - ); - final var settings = new IndexSettings(metadata, Settings.EMPTY); + createIndex("index1", Settings.builder().put(GPUPlugin.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), GPUPlugin.GpuMode.TRUE).build()); + IndexSettings settings = getIndexSettings(); final var indexOptions = DenseVectorFieldTypeTests.randomFlatIndexOptions(); var ex = expectThrows(IllegalArgumentException.class, () -> vectorsFormatProvider.getKnnVectorsFormat(settings, indexOptions)); @@ -154,11 +149,8 @@ public void testIndexSettingOnGPUNotSupportedThrows() { GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); VectorsFormatProvider vectorsFormatProvider = gpuPlugin.getVectorsFormatProvider(); - final var metadata = newIndexMeta( - "index1", - Settings.builder().put(IndexSettings.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), IndexSettings.GpuMode.TRUE).build() - ); - final var settings = new IndexSettings(metadata, Settings.EMPTY); + createIndex("index1", Settings.builder().put(GPUPlugin.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), GPUPlugin.GpuMode.TRUE).build()); + IndexSettings settings = getIndexSettings(); final var indexOptions = DenseVectorFieldTypeTests.randomGpuSupportedIndexOptions(); var ex = expectThrows(IllegalArgumentException.class, () -> vectorsFormatProvider.getKnnVectorsFormat(settings, indexOptions)); @@ -176,11 +168,8 @@ public void testIndexSettingOnGPUSupportThrowsRethrows() { GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); VectorsFormatProvider vectorsFormatProvider = gpuPlugin.getVectorsFormatProvider(); - final var metadata = newIndexMeta( - "index1", - Settings.builder().put(IndexSettings.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), IndexSettings.GpuMode.TRUE).build() - ); - final var settings = new IndexSettings(metadata, Settings.EMPTY); + createIndex("index1", Settings.builder().put(GPUPlugin.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), GPUPlugin.GpuMode.TRUE).build()); + IndexSettings settings = getIndexSettings(); final var indexOptions = DenseVectorFieldTypeTests.randomGpuSupportedIndexOptions(); var ex = expectThrows(IllegalArgumentException.class, () -> vectorsFormatProvider.getKnnVectorsFormat(settings, indexOptions)); @@ -197,11 +186,8 @@ public void testIndexSettingAutoIndexTypeSupportedGPUSupported() { GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); VectorsFormatProvider vectorsFormatProvider = gpuPlugin.getVectorsFormatProvider(); - final var metadata = newIndexMeta( - "index1", - Settings.builder().put(IndexSettings.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), IndexSettings.GpuMode.AUTO).build() - ); - final var settings = new IndexSettings(metadata, Settings.EMPTY); + createIndex("index1", Settings.builder().put(GPUPlugin.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), GPUPlugin.GpuMode.AUTO).build()); + IndexSettings settings = getIndexSettings(); final var indexOptions = DenseVectorFieldTypeTests.randomGpuSupportedIndexOptions(); var format = vectorsFormatProvider.getKnnVectorsFormat(settings, indexOptions); @@ -215,11 +201,8 @@ public void testIndexSettingAutoGPUNotSupported() { GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); VectorsFormatProvider vectorsFormatProvider = gpuPlugin.getVectorsFormatProvider(); - final var metadata = newIndexMeta( - "index1", - Settings.builder().put(IndexSettings.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), IndexSettings.GpuMode.AUTO).build() - ); - final var settings = new IndexSettings(metadata, Settings.EMPTY); + createIndex("index1", Settings.builder().put(GPUPlugin.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), GPUPlugin.GpuMode.AUTO).build()); + IndexSettings settings = getIndexSettings(); final var indexOptions = DenseVectorFieldTypeTests.randomGpuSupportedIndexOptions(); var format = vectorsFormatProvider.getKnnVectorsFormat(settings, indexOptions); @@ -233,11 +216,8 @@ public void testIndexSettingAutoIndexTypeNotSupported() { GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); VectorsFormatProvider vectorsFormatProvider = gpuPlugin.getVectorsFormatProvider(); - final var metadata = newIndexMeta( - "index1", - Settings.builder().put(IndexSettings.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), IndexSettings.GpuMode.AUTO).build() - ); - final var settings = new IndexSettings(metadata, Settings.EMPTY); + createIndex("index1", Settings.builder().put(GPUPlugin.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), GPUPlugin.GpuMode.AUTO).build()); + IndexSettings settings = getIndexSettings(); final var indexOptions = DenseVectorFieldTypeTests.randomFlatIndexOptions(); var format = vectorsFormatProvider.getKnnVectorsFormat(settings, indexOptions); @@ -251,14 +231,25 @@ public void testIndexSettingOff() { GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); VectorsFormatProvider vectorsFormatProvider = gpuPlugin.getVectorsFormatProvider(); - final var metadata = newIndexMeta( - "index1", - Settings.builder().put(IndexSettings.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), IndexSettings.GpuMode.FALSE).build() - ); - final var settings = new IndexSettings(metadata, Settings.EMPTY); + createIndex("index1", Settings.builder().put(GPUPlugin.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), GPUPlugin.GpuMode.FALSE).build()); + IndexSettings settings = getIndexSettings(); final var indexOptions = DenseVectorFieldTypeTests.randomGpuSupportedIndexOptions(); var format = vectorsFormatProvider.getKnnVectorsFormat(settings, indexOptions); assertNull(format); } + + private IndexSettings getIndexSettings() { + ensureGreen("index1"); + IndexSettings settings = null; + for (IndicesService service : internalCluster().getInstances(IndicesService.class)) { + IndexService indexService = service.indexService(resolveIndex("index1")); + if (indexService != null) { + settings = indexService.getIndexSettings(); + break; + } + } + assertNotNull(settings); + return settings; + } } diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java index 0a83e85e4ce1a..0f823bd5a4980 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java @@ -8,8 +8,8 @@ import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.util.hnsw.HnswGraphBuilder; +import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.util.FeatureFlag; -import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; import org.elasticsearch.index.mapper.vectors.VectorsFormatProvider; import org.elasticsearch.plugins.Plugin; @@ -17,16 +17,53 @@ import org.elasticsearch.xpack.gpu.codec.ES92GpuHnswSQVectorsFormat; import org.elasticsearch.xpack.gpu.codec.ES92GpuHnswVectorsFormat; +import java.util.List; + public class GPUPlugin extends Plugin implements InternalVectorFormatProviderPlugin { public static final FeatureFlag GPU_FORMAT = new FeatureFlag("gpu_format"); + /** + * An enum for the tri-state value of the `index.vectors.indexing.use_gpu` setting. + */ + public enum GpuMode { + TRUE, + FALSE, + AUTO + } + + /** + * Setting to control whether to use GPU for vectors indexing. + * Currently only applicable for index_options.type: hnsw. + * + * If unset or "auto", an automatic decision is made based on the presence of GPU, necessary libraries, vectors' index type. + * If set to true, GPU must be used for vectors indexing, and if GPU or necessary libraries are not available, + * an exception will be thrown. + * If set to false, GPU will not be used for vectors indexing. + */ + public static final Setting VECTORS_INDEXING_USE_GPU_SETTING = Setting.enumSetting( + GpuMode.class, + "index.vectors.indexing.use_gpu", + GpuMode.AUTO, + Setting.Property.IndexScope, + Setting.Property.Dynamic + ); + + @Override + public List> getSettings() { + if (GPU_FORMAT.isEnabled()) { + return List.of(VECTORS_INDEXING_USE_GPU_SETTING); + } else { + return List.of(VECTORS_INDEXING_USE_GPU_SETTING); + } + } + @Override public VectorsFormatProvider getVectorsFormatProvider() { return (indexSettings, indexOptions) -> { if (GPU_FORMAT.isEnabled()) { - IndexSettings.GpuMode gpuMode = indexSettings.getValue(IndexSettings.VECTORS_INDEXING_USE_GPU_SETTING); - if (gpuMode == IndexSettings.GpuMode.TRUE) { + GpuMode gpuMode = indexSettings.getValue(VECTORS_INDEXING_USE_GPU_SETTING); + if (gpuMode == GpuMode.TRUE) { if (vectorIndexTypeSupported(indexOptions.getType()) == false) { throw new IllegalArgumentException( "[index.vectors.indexing.use_gpu] doesn't support [index_options.type] of [" + indexOptions.getType() + "]." @@ -39,9 +76,7 @@ public VectorsFormatProvider getVectorsFormatProvider() { } return getVectorsFormat(indexOptions); } - if (gpuMode == IndexSettings.GpuMode.AUTO - && vectorIndexTypeSupported(indexOptions.getType()) - && GPUSupport.isSupported(false)) { + if (gpuMode == GpuMode.AUTO && vectorIndexTypeSupported(indexOptions.getType()) && GPUSupport.isSupported(false)) { return getVectorsFormat(indexOptions); } } From 543fafe30e6f9e1afdfe62e4f36753cd3ddea0aa Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Mon, 29 Sep 2025 17:18:30 -0400 Subject: [PATCH 106/109] Add test when FF is disabled --- .../plugin/gpu/GPUPluginInitializationIT.java | 28 +++++++++++++++++++ .../elasticsearch/xpack/gpu/GPUPlugin.java | 2 +- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUPluginInitializationIT.java b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUPluginInitializationIT.java index c53095900c18d..65d8daf14d31e 100644 --- a/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUPluginInitializationIT.java +++ b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUPluginInitializationIT.java @@ -29,6 +29,7 @@ import java.util.List; import java.util.function.Function; +import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.startsWith; @@ -112,6 +113,33 @@ public void testFFOff() { assertNull(format); } + public void testFFOffIndexSettingNotSupported() { + assumeFalse("GPU_FORMAT feature flag disabled", GPUPlugin.GPU_FORMAT.isEnabled()); + IllegalArgumentException exception = expectThrows( + IllegalArgumentException.class, + () -> createIndex( + "index1", + Settings.builder().put(GPUPlugin.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), GPUPlugin.GpuMode.TRUE).build() + ) + ); + assertThat(exception.getMessage(), containsString("unknown setting [index.vectors.indexing.use_gpu]")); + } + + public void testFFOffGPUFormatNull() { + assumeFalse("GPU_FORMAT feature flag disabled", GPUPlugin.GPU_FORMAT.isEnabled()); + TestCuVSServiceProvider.mockedGPUInfoProvider = SUPPORTED_GPU_PROVIDER; + + GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); + VectorsFormatProvider vectorsFormatProvider = gpuPlugin.getVectorsFormatProvider(); + + createIndex("index1", Settings.EMPTY); + IndexSettings settings = getIndexSettings(); + final var indexOptions = DenseVectorFieldTypeTests.randomGpuSupportedIndexOptions(); + + var format = vectorsFormatProvider.getKnnVectorsFormat(settings, indexOptions); + assertNull(format); + } + public void testIndexSettingOnIndexTypeSupportedGPUSupported() { assumeTrue("GPU_FORMAT feature flag enabled", GPUPlugin.GPU_FORMAT.isEnabled()); TestCuVSServiceProvider.mockedGPUInfoProvider = SUPPORTED_GPU_PROVIDER; diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java index 0f823bd5a4980..183b419b64cc0 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java @@ -54,7 +54,7 @@ public List> getSettings() { if (GPU_FORMAT.isEnabled()) { return List.of(VECTORS_INDEXING_USE_GPU_SETTING); } else { - return List.of(VECTORS_INDEXING_USE_GPU_SETTING); + return List.of(); } } From 6d843e10b51b1242de956dffafd2ea50923a4924 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Mon, 29 Sep 2025 17:27:30 -0400 Subject: [PATCH 107/109] Rename formats to Lucene99HnswVectorsFormat --- .../xpack/gpu/codec/ES92GpuHnswSQVectorsFormat.java | 2 +- .../xpack/gpu/codec/ES92GpuHnswVectorsFormat.java | 2 +- .../xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswSQVectorsFormat.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswSQVectorsFormat.java index 60c781afa135b..b62766fb39c3a 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswSQVectorsFormat.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswSQVectorsFormat.java @@ -28,7 +28,7 @@ * HNSW graph is built on GPU, while scalar quantization and search is performed on CPU. */ public class ES92GpuHnswSQVectorsFormat extends KnnVectorsFormat { - public static final String NAME = "ES92GPUHnswScalarQuantizedVectorsFormat"; + public static final String NAME = "Lucene99HnswVectorsFormat"; static final int MAXIMUM_MAX_CONN = 512; static final int MAXIMUM_BEAM_WIDTH = 3200; private final int maxConn; diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsFormat.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsFormat.java index 47fcb39056aee..8761b9e12f22a 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsFormat.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsFormat.java @@ -27,7 +27,7 @@ * leverage GPU processing capabilities for vector search operations. */ public class ES92GpuHnswVectorsFormat extends KnnVectorsFormat { - public static final String NAME = "ES92GpuHnswVectorsFormat"; + public static final String NAME = "Lucene99HnswVectorsFormat"; public static final int VERSION_GROUPVARINT = 1; static final String LUCENE99_HNSW_META_CODEC_NAME = "Lucene99HnswVectorsFormatMeta"; diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java index 45903b1786872..2648691d03eec 100644 --- a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java @@ -43,7 +43,7 @@ protected Collection getPlugins() { public void testKnnVectorsFormat() throws IOException { // TODO improve test with custom parameters KnnVectorsFormat knnVectorsFormat = getKnnVectorsFormat("hnsw"); - String expectedStr = "ES92GpuHnswVectorsFormat(name=ES92GpuHnswVectorsFormat, " + String expectedStr = "Lucene99HnswVectorsFormat(name=Lucene99HnswVectorsFormat, " + "maxConn=16, beamWidth=128, flatVectorFormat=Lucene99FlatVectorsFormat)"; assertEquals(expectedStr, knnVectorsFormat.toString()); } @@ -52,7 +52,7 @@ public void testKnnVectorsFormat() throws IOException { public void testKnnQuantizedHNSWVectorsFormat() throws IOException { // TOD improve the test with custom parameters KnnVectorsFormat knnVectorsFormat = getKnnVectorsFormat("int8_hnsw"); - String expectedStr = "ES92GPUHnswScalarQuantizedVectorsFormat(name=ES92GPUHnswScalarQuantizedVectorsFormat, " + String expectedStr = "Lucene99HnswVectorsFormat(name=Lucene99HnswVectorsFormat, " + "maxConn=16, beamWidth=128, flatVectorFormat=ES814ScalarQuantizedVectorsFormat"; assertTrue(knnVectorsFormat.toString().startsWith(expectedStr)); } From e66aca5122bdc72ff1c7810cecb122737fddf755 Mon Sep 17 00:00:00 2001 From: "elastic-vault-github-plugin-prod[bot]" <150874479+elastic-vault-github-plugin-prod[bot]@users.noreply.github.com> Date: Tue, 30 Sep 2025 04:51:38 +0000 Subject: [PATCH 108/109] Update cuvs snapshot version to 25.10.0 --- .buildkite/scripts/cuvs-snapshot/current-snapshot-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version index 806e86639af24..3bb6b7db4687c 100644 --- a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version +++ b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version @@ -1 +1 @@ -91d72b17 +fdb8bfb8 From f9260843661f40d4e592754046882a7785c3ca3d Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Tue, 30 Sep 2025 06:50:09 -0400 Subject: [PATCH 109/109] Rename feature flag to gpu_vectors_indexing --- docs/changelog/135545.yaml | 2 +- server/src/main/java/org/elasticsearch/index/IndexSettings.java | 1 - .../src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/changelog/135545.yaml b/docs/changelog/135545.yaml index 252c830b85e05..bbd87fa047476 100644 --- a/docs/changelog/135545.yaml +++ b/docs/changelog/135545.yaml @@ -1,5 +1,5 @@ pr: 135545 -summary: Add GPUPlugin for vector indeces on GPU +summary: Add GPUPlugin for indexing vectors on GPU area: Vector Search type: feature issues: [] diff --git a/server/src/main/java/org/elasticsearch/index/IndexSettings.java b/server/src/main/java/org/elasticsearch/index/IndexSettings.java index e9fee0a4dacc8..b396e1ca206e3 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexSettings.java +++ b/server/src/main/java/org/elasticsearch/index/IndexSettings.java @@ -65,7 +65,6 @@ * be called for each settings update. */ public final class IndexSettings { - public static final Setting> DEFAULT_FIELD_SETTING = Setting.stringListSetting( "index.query.default_field", Collections.singletonList("*"), diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java index 183b419b64cc0..62190bc0fb752 100644 --- a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java @@ -21,7 +21,7 @@ public class GPUPlugin extends Plugin implements InternalVectorFormatProviderPlugin { - public static final FeatureFlag GPU_FORMAT = new FeatureFlag("gpu_format"); + public static final FeatureFlag GPU_FORMAT = new FeatureFlag("gpu_vectors_indexing"); /** * An enum for the tri-state value of the `index.vectors.indexing.use_gpu` setting.