From 1f685c99b27d8d3e17ab79a5ab83202b201335b7 Mon Sep 17 00:00:00 2001 From: Benjamin Trent <4357155+benwtrent@users.noreply.github.com> Date: Wed, 11 Jun 2025 09:19:53 -0400 Subject: [PATCH 1/2] Fix ivf nodestats impl for getOffHeapByteSize --- .../vectors/DefaultIVFVectorsReader.java | 59 ++----------------- 1 file changed, 6 insertions(+), 53 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java index 36a7c2084a4a5..ab8ad21674177 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java @@ -11,7 +11,6 @@ import org.apache.lucene.codecs.hnsw.FlatVectorsReader; import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.FloatVectorValues; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.lucene.search.KnnCollector; @@ -20,10 +19,12 @@ import org.apache.lucene.util.VectorUtil; import org.apache.lucene.util.hnsw.NeighborQueue; import org.apache.lucene.util.quantization.OptimizedScalarQuantizer; +import org.elasticsearch.index.codec.vectors.reflect.OffHeapStats; import org.elasticsearch.simdvec.ES91OSQVectorsScorer; import org.elasticsearch.simdvec.ESVectorUtil; import java.io.IOException; +import java.util.Map; import java.util.function.IntPredicate; import static org.apache.lucene.codecs.lucene102.Lucene102BinaryQuantizedVectorsFormat.QUERY_BITS; @@ -38,7 +39,7 @@ * Default implementation of {@link IVFVectorsReader}. It scores the posting lists centroids using * brute force and then scores the top ones using the posting list. */ -public class DefaultIVFVectorsReader extends IVFVectorsReader { +public class DefaultIVFVectorsReader extends IVFVectorsReader implements OffHeapStats { private static final float FOUR_BIT_SCALE = 1f / ((1 << 4) - 1); public DefaultIVFVectorsReader(SegmentReadState state, FlatVectorsReader rawVectorsReader) throws IOException { @@ -163,57 +164,9 @@ static float int4QuantizedScore( } } - static class OffHeapCentroidFloatVectorValues extends FloatVectorValues { - private final int numCentroids; - private final IndexInput input; - private final int dimension; - private final float[] centroid; - private final long centroidByteSize; - private int ord = -1; - - OffHeapCentroidFloatVectorValues(int numCentroids, IndexInput input, int dimension) { - this.numCentroids = numCentroids; - this.input = input; - this.dimension = dimension; - this.centroid = new float[dimension]; - this.centroidByteSize = dimension + 3 * Float.BYTES + Short.BYTES; - } - - @Override - public float[] vectorValue(int ord) throws IOException { - if (ord < 0 || ord >= numCentroids) { - throw new IllegalArgumentException("ord must be in [0, " + numCentroids + "]"); - } - if (ord == this.ord) { - return centroid; - } - readQuantizedCentroid(ord); - return centroid; - } - - private void readQuantizedCentroid(int centroidOrdinal) throws IOException { - if (centroidOrdinal == ord) { - return; - } - input.seek(numCentroids * centroidByteSize + (long) Float.BYTES * dimension * centroidOrdinal); - input.readFloats(centroid, 0, centroid.length); - ord = centroidOrdinal; - } - - @Override - public int dimension() { - return dimension; - } - - @Override - public int size() { - return numCentroids; - } - - @Override - public FloatVectorValues copy() throws IOException { - return new OffHeapCentroidFloatVectorValues(numCentroids, input.clone(), dimension); - } + @Override + public Map getOffHeapByteSize(FieldInfo fieldInfo) { + return Map.of(); } private static class MemorySegmentPostingsVisitor implements PostingVisitor { From 4249eee6feed5c66b3cfe8e826dfa51660c482c1 Mon Sep 17 00:00:00 2001 From: Benjamin Trent <4357155+benwtrent@users.noreply.github.com> Date: Wed, 11 Jun 2025 09:35:17 -0400 Subject: [PATCH 2/2] adding test --- .../codec/vectors/IVFVectorsFormatTests.java | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/IVFVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/IVFVectorsFormatTests.java index f7eb4cf5241ce..177a3d00c3dc4 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/IVFVectorsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/IVFVectorsFormatTests.java @@ -13,13 +13,25 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.FilterCodec; import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.KnnVectorsReader; +import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.KnnFloatVectorField; +import org.apache.lucene.index.CodecReader; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.VectorEncoding; import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.store.Directory; import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; import org.apache.lucene.tests.util.TestUtil; import org.elasticsearch.common.logging.LogConfigurator; +import org.elasticsearch.index.codec.vectors.reflect.OffHeapByteSizeUtils; import org.junit.Before; +import java.io.IOException; import java.util.List; import java.util.Locale; @@ -94,4 +106,26 @@ public void testLimits() { expectThrows(IllegalArgumentException.class, () -> new IVFVectorsFormat(MIN_VECTORS_PER_CLUSTER - 1)); expectThrows(IllegalArgumentException.class, () -> new IVFVectorsFormat(MAX_VECTORS_PER_CLUSTER + 1)); } + + public void testSimpleOffHeapSize() throws IOException { + float[] vector = randomVector(random().nextInt(12, 500)); + try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) { + Document doc = new Document(); + doc.add(new KnnFloatVectorField("f", vector, VectorSimilarityFunction.EUCLIDEAN)); + w.addDocument(doc); + w.commit(); + try (IndexReader reader = DirectoryReader.open(w)) { + LeafReader r = getOnlyLeafReader(reader); + if (r instanceof CodecReader codecReader) { + KnnVectorsReader knnVectorsReader = codecReader.getVectorReader(); + if (knnVectorsReader instanceof PerFieldKnnVectorsFormat.FieldsReader fieldsReader) { + knnVectorsReader = fieldsReader.getFieldReader("f"); + } + var fieldInfo = r.getFieldInfos().fieldInfo("f"); + var offHeap = OffHeapByteSizeUtils.getOffHeapByteSize(knnVectorsReader, fieldInfo); + assertEquals(0, offHeap.size()); + } + } + } + } }