diff --git a/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java b/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java index ac4d1f948e4df..9de3b7fa15b6f 100644 --- a/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java +++ b/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java @@ -30,7 +30,7 @@ import org.elasticsearch.core.PathUtils; import org.elasticsearch.index.codec.vectors.ES813Int8FlatVectorFormat; import org.elasticsearch.index.codec.vectors.ES814HnswScalarQuantizedVectorsFormat; -import org.elasticsearch.index.codec.vectors.IVFVectorsFormat; +import org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat; import org.elasticsearch.index.codec.vectors.es818.ES818BinaryQuantizedVectorsFormat; import org.elasticsearch.index.codec.vectors.es818.ES818HnswBinaryQuantizedVectorsFormat; import org.elasticsearch.logging.Level; @@ -106,7 +106,7 @@ private static String formatIndexPath(CmdLineArgs args) { static Codec createCodec(CmdLineArgs args) { final KnnVectorsFormat format; if (args.indexType() == IndexType.IVF) { - format = new IVFVectorsFormat(args.ivfClusterSize(), IVFVectorsFormat.DEFAULT_CENTROIDS_PER_PARENT_CLUSTER); + format = new ES920DiskBBQVectorsFormat(args.ivfClusterSize(), ES920DiskBBQVectorsFormat.DEFAULT_CENTROIDS_PER_PARENT_CLUSTER); } else { if (args.quantizeBits() == 1) { if (args.indexType() == IndexType.FLAT) { diff --git a/server/src/main/java/module-info.java b/server/src/main/java/module-info.java index 549c603b13980..470ca69fb0d68 100644 --- a/server/src/main/java/module-info.java +++ b/server/src/main/java/module-info.java @@ -7,6 +7,7 @@ * License v3.0 only", or the "Server Side Public License, v 1". */ +import org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat; import org.elasticsearch.plugins.internal.RestExtension; import org.elasticsearch.reservedstate.ReservedStateHandlerProvider; @@ -461,7 +462,7 @@ org.elasticsearch.index.codec.vectors.es816.ES816HnswBinaryQuantizedVectorsFormat, org.elasticsearch.index.codec.vectors.es818.ES818BinaryQuantizedVectorsFormat, org.elasticsearch.index.codec.vectors.es818.ES818HnswBinaryQuantizedVectorsFormat, - org.elasticsearch.index.codec.vectors.IVFVectorsFormat; + ES920DiskBBQVectorsFormat; provides org.apache.lucene.codecs.Codec with @@ -487,4 +488,6 @@ exports org.elasticsearch.index.codec.vectors to org.elasticsearch.test.knn; exports org.elasticsearch.index.codec.vectors.es818 to org.elasticsearch.test.knn; exports org.elasticsearch.inference.telemetry; + exports org.elasticsearch.index.codec.vectors.diskbbq to org.elasticsearch.test.knn; + exports org.elasticsearch.index.codec.vectors.cluster to org.elasticsearch.test.knn; } diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/cluster/KMeansLocal.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/cluster/KMeansLocal.java index 0e5ce3d211737..6fd0687dd2ffc 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/cluster/KMeansLocal.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/cluster/KMeansLocal.java @@ -13,7 +13,6 @@ import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.VectorUtil; import org.apache.lucene.util.hnsw.IntToIntFunction; -import org.elasticsearch.index.codec.vectors.SampleReader; import org.elasticsearch.simdvec.ESVectorUtil; import java.io.IOException; diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/SampleReader.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/cluster/SampleReader.java similarity index 92% rename from server/src/main/java/org/elasticsearch/index/codec/vectors/SampleReader.java rename to server/src/main/java/org/elasticsearch/index/codec/vectors/cluster/SampleReader.java index f2d7944f1088c..2295468b38758 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/SampleReader.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/cluster/SampleReader.java @@ -17,7 +17,7 @@ * * Modifications copyright (C) 2025 Elasticsearch B.V. */ -package org.elasticsearch.index.codec.vectors; +package org.elasticsearch.index.codec.vectors.cluster; import org.apache.lucene.codecs.lucene95.HasIndexSlice; import org.apache.lucene.index.FloatVectorValues; @@ -29,7 +29,7 @@ import java.util.Random; import java.util.function.IntUnaryOperator; -public class SampleReader extends FloatVectorValues implements HasIndexSlice { +class SampleReader extends FloatVectorValues implements HasIndexSlice { private final FloatVectorValues origin; private final int sampleSize; private final IntUnaryOperator sampleFunction; @@ -81,7 +81,7 @@ public Bits getAcceptOrds(Bits acceptDocs) { throw new IllegalStateException("Not supported"); } - public static SampleReader createSampleReader(FloatVectorValues origin, int k, long seed) { + static SampleReader createSampleReader(FloatVectorValues origin, int k, long seed) { // TODO can we do something algorithmically that aligns an ordinal with a unique integer between 0 and numVectors? if (k >= origin.size()) { new SampleReader(origin, origin.size(), i -> i); @@ -101,7 +101,7 @@ public static SampleReader createSampleReader(FloatVectorValues origin, int k, l * @param seed random seed * @return array of k samples */ - public static int[] reservoirSample(int n, int k, long seed) { + static int[] reservoirSample(int n, int k, long seed) { Random rnd = new Random(seed); int[] reservoir = new int[k]; for (int i = 0; i < k; i++) { diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/CentroidAssignments.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/CentroidAssignments.java similarity index 94% rename from server/src/main/java/org/elasticsearch/index/codec/vectors/CentroidAssignments.java rename to server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/CentroidAssignments.java index e92ece41077a6..aa1e80a3b28ee 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/CentroidAssignments.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/CentroidAssignments.java @@ -7,7 +7,7 @@ * License v3.0 only", or the "Server Side Public License, v 1". */ -package org.elasticsearch.index.codec.vectors; +package org.elasticsearch.index.codec.vectors.diskbbq; record CentroidAssignments(int numCentroids, float[][] centroids, int[] assignments, int[] overspillAssignments) { diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/DiskBBQBulkWriter.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/DiskBBQBulkWriter.java similarity index 92% rename from server/src/main/java/org/elasticsearch/index/codec/vectors/DiskBBQBulkWriter.java rename to server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/DiskBBQBulkWriter.java index 4c1cc27286a63..793720ac8182f 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/DiskBBQBulkWriter.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/DiskBBQBulkWriter.java @@ -7,10 +7,11 @@ * License v3.0 only", or the "Server Side Public License, v 1". */ -package org.elasticsearch.index.codec.vectors; +package org.elasticsearch.index.codec.vectors.diskbbq; import org.apache.lucene.search.CheckedIntConsumer; import org.apache.lucene.store.IndexOutput; +import org.elasticsearch.index.codec.vectors.OptimizedScalarQuantizer; import java.io.IOException; @@ -28,7 +29,7 @@ protected DiskBBQBulkWriter(int bulkSize, IndexOutput out) { this.out = out; } - abstract void writeVectors(DefaultIVFVectorsWriter.QuantizedVectorValues qvv, CheckedIntConsumer docsWriter) + abstract void writeVectors(ES920DiskBBQVectorsWriter.QuantizedVectorValues qvv, CheckedIntConsumer docsWriter) throws IOException; static class OneBitDiskBBQBulkWriter extends DiskBBQBulkWriter { @@ -40,7 +41,7 @@ static class OneBitDiskBBQBulkWriter extends DiskBBQBulkWriter { } @Override - void writeVectors(DefaultIVFVectorsWriter.QuantizedVectorValues qvv, CheckedIntConsumer docsWriter) + void writeVectors(ES920DiskBBQVectorsWriter.QuantizedVectorValues qvv, CheckedIntConsumer docsWriter) throws IOException { int limit = qvv.count() - bulkSize + 1; int i = 0; @@ -103,7 +104,7 @@ static class SevenBitDiskBBQBulkWriter extends DiskBBQBulkWriter { } @Override - void writeVectors(DefaultIVFVectorsWriter.QuantizedVectorValues qvv, CheckedIntConsumer docsWriter) + void writeVectors(ES920DiskBBQVectorsWriter.QuantizedVectorValues qvv, CheckedIntConsumer docsWriter) throws IOException { int limit = qvv.count() - bulkSize + 1; int i = 0; diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/DocIdsWriter.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/DocIdsWriter.java similarity index 99% rename from server/src/main/java/org/elasticsearch/index/codec/vectors/DocIdsWriter.java rename to server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/DocIdsWriter.java index c6604bb3427da..a3984d1aa1b34 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/DocIdsWriter.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/DocIdsWriter.java @@ -16,7 +16,7 @@ * limitations under the License. * Modifications copyright (C) 2025 Elasticsearch B.V. */ -package org.elasticsearch.index.codec.vectors; +package org.elasticsearch.index.codec.vectors.diskbbq; import org.apache.lucene.index.PointValues.IntersectVisitor; import org.apache.lucene.store.DataOutput; diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsFormat.java similarity index 87% rename from server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsFormat.java rename to server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsFormat.java index 73cf4adb804ba..f7d804c428830 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsFormat.java @@ -7,7 +7,7 @@ * License v3.0 only", or the "Server Side Public License, v 1". */ -package org.elasticsearch.index.codec.vectors; +package org.elasticsearch.index.codec.vectors.diskbbq; import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.codecs.KnnVectorsReader; @@ -17,6 +17,7 @@ import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsFormat; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; +import org.elasticsearch.index.codec.vectors.OptimizedScalarQuantizer; import java.io.IOException; @@ -42,9 +43,9 @@ *

Stores metadata including the number of centroids and their offsets in the clivf file

* */ -public class IVFVectorsFormat extends KnnVectorsFormat { +public class ES920DiskBBQVectorsFormat extends KnnVectorsFormat { - public static final String NAME = "IVFVectorsFormat"; + public static final String NAME = "ES920DiskBBQVectorsFormat"; // centroid ordinals -> centroid values, offsets public static final String CENTROID_EXTENSION = "cenivf"; // offsets contained in cen_ivf, [vector ordinals, actually just docIds](long varint), quantized @@ -72,7 +73,7 @@ public class IVFVectorsFormat extends KnnVectorsFormat { private final int vectorPerCluster; private final int centroidsPerParentCluster; - public IVFVectorsFormat(int vectorPerCluster, int centroidsPerParentCluster) { + public ES920DiskBBQVectorsFormat(int vectorPerCluster, int centroidsPerParentCluster) { super(NAME); if (vectorPerCluster < MIN_VECTORS_PER_CLUSTER || vectorPerCluster > MAX_VECTORS_PER_CLUSTER) { throw new IllegalArgumentException( @@ -99,18 +100,18 @@ public IVFVectorsFormat(int vectorPerCluster, int centroidsPerParentCluster) { } /** Constructs a format using the given graph construction parameters and scalar quantization. */ - public IVFVectorsFormat() { + public ES920DiskBBQVectorsFormat() { this(DEFAULT_VECTORS_PER_CLUSTER, DEFAULT_CENTROIDS_PER_PARENT_CLUSTER); } @Override public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException { - return new DefaultIVFVectorsWriter(state, rawVectorFormat.fieldsWriter(state), vectorPerCluster, centroidsPerParentCluster); + return new ES920DiskBBQVectorsWriter(state, rawVectorFormat.fieldsWriter(state), vectorPerCluster, centroidsPerParentCluster); } @Override public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException { - return new DefaultIVFVectorsReader(state, rawVectorFormat.fieldsReader(state)); + return new ES920DiskBBQVectorsReader(state, rawVectorFormat.fieldsReader(state)); } @Override @@ -120,7 +121,7 @@ public int getMaxDimensions(String fieldName) { @Override public String toString() { - return "IVFVectorsFormat(" + "vectorPerCluster=" + vectorPerCluster + ')'; + return "ES920DiskBBQVectorsFormat(" + "vectorPerCluster=" + vectorPerCluster + ')'; } } diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsReader.java similarity index 98% rename from server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java rename to server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsReader.java index 7bba3d94a7e96..dd06bdba7f6a1 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsReader.java @@ -7,7 +7,7 @@ * License v3.0 only", or the "Server Side Public License, v 1". */ -package org.elasticsearch.index.codec.vectors; +package org.elasticsearch.index.codec.vectors.diskbbq; import org.apache.lucene.codecs.hnsw.FlatVectorsReader; import org.apache.lucene.index.FieldInfo; @@ -18,6 +18,7 @@ import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Bits; import org.apache.lucene.util.VectorUtil; +import org.elasticsearch.index.codec.vectors.OptimizedScalarQuantizer; import org.elasticsearch.index.codec.vectors.cluster.NeighborQueue; import org.elasticsearch.index.codec.vectors.reflect.OffHeapStats; import org.elasticsearch.simdvec.ES91OSQVectorsScorer; @@ -38,9 +39,9 @@ * Default implementation of {@link IVFVectorsReader}. It scores the posting lists centroids using * brute force and then scores the top ones using the posting list. */ -public class DefaultIVFVectorsReader extends IVFVectorsReader implements OffHeapStats { +public class ES920DiskBBQVectorsReader extends IVFVectorsReader implements OffHeapStats { - public DefaultIVFVectorsReader(SegmentReadState state, FlatVectorsReader rawVectorsReader) throws IOException { + public ES920DiskBBQVectorsReader(SegmentReadState state, FlatVectorsReader rawVectorsReader) throws IOException { super(state, rawVectorsReader); } diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsWriter.java similarity index 98% rename from server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java rename to server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsWriter.java index 91e4d29690660..f27000bf65de5 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsWriter.java @@ -7,7 +7,7 @@ * License v3.0 only", or the "Server Side Public License, v 1". */ -package org.elasticsearch.index.codec.vectors; +package org.elasticsearch.index.codec.vectors.diskbbq; import org.apache.lucene.codecs.hnsw.FlatVectorsWriter; import org.apache.lucene.index.FieldInfo; @@ -23,6 +23,8 @@ import org.apache.lucene.util.packed.PackedInts; import org.apache.lucene.util.packed.PackedLongValues; import org.elasticsearch.core.SuppressForbidden; +import org.elasticsearch.index.codec.vectors.BQVectorUtils; +import org.elasticsearch.index.codec.vectors.OptimizedScalarQuantizer; import org.elasticsearch.index.codec.vectors.cluster.HierarchicalKMeans; import org.elasticsearch.index.codec.vectors.cluster.KMeansResult; import org.elasticsearch.logging.LogManager; @@ -42,13 +44,13 @@ * partition the vector space, and then stores the centroids and posting list in a sequential * fashion. */ -public class DefaultIVFVectorsWriter extends IVFVectorsWriter { - private static final Logger logger = LogManager.getLogger(DefaultIVFVectorsWriter.class); +public class ES920DiskBBQVectorsWriter extends IVFVectorsWriter { + private static final Logger logger = LogManager.getLogger(ES920DiskBBQVectorsWriter.class); private final int vectorPerCluster; private final int centroidsPerParentCluster; - public DefaultIVFVectorsWriter( + public ES920DiskBBQVectorsWriter( SegmentWriteState state, FlatVectorsWriter rawVectorDelegate, int vectorPerCluster, diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsReader.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/IVFVectorsReader.java similarity index 92% rename from server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsReader.java rename to server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/IVFVectorsReader.java index a2914682ac93f..dc531c7ca8e56 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsReader.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/IVFVectorsReader.java @@ -7,7 +7,7 @@ * License v3.0 only", or the "Server Side Public License, v 1". */ -package org.elasticsearch.index.codec.vectors; +package org.elasticsearch.index.codec.vectors.diskbbq; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.KnnVectorsReader; @@ -35,7 +35,7 @@ import java.io.IOException; import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.SIMILARITY_FUNCTIONS; -import static org.elasticsearch.index.codec.vectors.IVFVectorsFormat.DYNAMIC_VISIT_RATIO; +import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.DYNAMIC_VISIT_RATIO; /** * Reader for IVF vectors. This reader is used to read the IVF vectors from the index. @@ -54,7 +54,11 @@ protected IVFVectorsReader(SegmentReadState state, FlatVectorsReader rawVectorsR this.fieldInfos = state.fieldInfos; this.rawVectorsReader = rawVectorsReader; this.fields = new IntObjectHashMap<>(); - String meta = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, IVFVectorsFormat.IVF_META_EXTENSION); + String meta = IndexFileNames.segmentFileName( + state.segmentInfo.name, + state.segmentSuffix, + ES920DiskBBQVectorsFormat.IVF_META_EXTENSION + ); int versionMeta = -1; boolean success = false; @@ -63,9 +67,9 @@ protected IVFVectorsReader(SegmentReadState state, FlatVectorsReader rawVectorsR try { versionMeta = CodecUtil.checkIndexHeader( ivfMeta, - IVFVectorsFormat.NAME, - IVFVectorsFormat.VERSION_START, - IVFVectorsFormat.VERSION_CURRENT, + ES920DiskBBQVectorsFormat.NAME, + ES920DiskBBQVectorsFormat.VERSION_START, + ES920DiskBBQVectorsFormat.VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix ); @@ -75,8 +79,20 @@ protected IVFVectorsReader(SegmentReadState state, FlatVectorsReader rawVectorsR } finally { CodecUtil.checkFooter(ivfMeta, priorE); } - ivfCentroids = openDataInput(state, versionMeta, IVFVectorsFormat.CENTROID_EXTENSION, IVFVectorsFormat.NAME, state.context); - ivfClusters = openDataInput(state, versionMeta, IVFVectorsFormat.CLUSTER_EXTENSION, IVFVectorsFormat.NAME, state.context); + ivfCentroids = openDataInput( + state, + versionMeta, + ES920DiskBBQVectorsFormat.CENTROID_EXTENSION, + ES920DiskBBQVectorsFormat.NAME, + state.context + ); + ivfClusters = openDataInput( + state, + versionMeta, + ES920DiskBBQVectorsFormat.CLUSTER_EXTENSION, + ES920DiskBBQVectorsFormat.NAME, + state.context + ); success = true; } finally { if (success == false) { @@ -108,8 +124,8 @@ private static IndexInput openDataInput( final int versionVectorData = CodecUtil.checkIndexHeader( in, codecName, - IVFVectorsFormat.VERSION_START, - IVFVectorsFormat.VERSION_CURRENT, + ES920DiskBBQVectorsFormat.VERSION_START, + ES920DiskBBQVectorsFormat.VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix ); diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsWriter.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/IVFVectorsWriter.java similarity index 97% rename from server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsWriter.java rename to server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/IVFVectorsWriter.java index 2e1f33c525f77..26b1e12991bd5 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsWriter.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/IVFVectorsWriter.java @@ -7,7 +7,7 @@ * License v3.0 only", or the "Server Side Public License, v 1". */ -package org.elasticsearch.index.codec.vectors; +package org.elasticsearch.index.codec.vectors.diskbbq; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.KnnFieldVectorsWriter; @@ -59,42 +59,42 @@ protected IVFVectorsWriter(SegmentWriteState state, FlatVectorsWriter rawVectorD final String metaFileName = IndexFileNames.segmentFileName( state.segmentInfo.name, state.segmentSuffix, - IVFVectorsFormat.IVF_META_EXTENSION + ES920DiskBBQVectorsFormat.IVF_META_EXTENSION ); final String ivfCentroidsFileName = IndexFileNames.segmentFileName( state.segmentInfo.name, state.segmentSuffix, - IVFVectorsFormat.CENTROID_EXTENSION + ES920DiskBBQVectorsFormat.CENTROID_EXTENSION ); final String ivfClustersFileName = IndexFileNames.segmentFileName( state.segmentInfo.name, state.segmentSuffix, - IVFVectorsFormat.CLUSTER_EXTENSION + ES920DiskBBQVectorsFormat.CLUSTER_EXTENSION ); boolean success = false; try { ivfMeta = state.directory.createOutput(metaFileName, state.context); CodecUtil.writeIndexHeader( ivfMeta, - IVFVectorsFormat.NAME, - IVFVectorsFormat.VERSION_CURRENT, + ES920DiskBBQVectorsFormat.NAME, + ES920DiskBBQVectorsFormat.VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix ); ivfCentroids = state.directory.createOutput(ivfCentroidsFileName, state.context); CodecUtil.writeIndexHeader( ivfCentroids, - IVFVectorsFormat.NAME, - IVFVectorsFormat.VERSION_CURRENT, + ES920DiskBBQVectorsFormat.NAME, + ES920DiskBBQVectorsFormat.VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix ); ivfClusters = state.directory.createOutput(ivfClustersFileName, state.context); CodecUtil.writeIndexHeader( ivfClusters, - IVFVectorsFormat.NAME, - IVFVectorsFormat.VERSION_CURRENT, + ES920DiskBBQVectorsFormat.NAME, + ES920DiskBBQVectorsFormat.VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix ); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java index c1ff919c1f014..3f1e731b0cf87 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java @@ -55,7 +55,7 @@ import org.elasticsearch.index.codec.vectors.ES814HnswScalarQuantizedVectorsFormat; import org.elasticsearch.index.codec.vectors.ES815BitFlatVectorFormat; import org.elasticsearch.index.codec.vectors.ES815HnswBitVectorsFormat; -import org.elasticsearch.index.codec.vectors.IVFVectorsFormat; +import org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat; import org.elasticsearch.index.codec.vectors.es818.ES818BinaryQuantizedVectorsFormat; import org.elasticsearch.index.codec.vectors.es818.ES818HnswBinaryQuantizedVectorsFormat; import org.elasticsearch.index.fielddata.FieldDataContext; @@ -118,8 +118,8 @@ import static org.elasticsearch.common.Strings.format; import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; import static org.elasticsearch.index.IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING; -import static org.elasticsearch.index.codec.vectors.IVFVectorsFormat.MAX_VECTORS_PER_CLUSTER; -import static org.elasticsearch.index.codec.vectors.IVFVectorsFormat.MIN_VECTORS_PER_CLUSTER; +import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.MAX_VECTORS_PER_CLUSTER; +import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.MIN_VECTORS_PER_CLUSTER; /** * A {@link FieldMapper} for indexing a dense vector of floats. @@ -1552,7 +1552,7 @@ public boolean supportsDimension(int dims) { @Override public DenseVectorIndexOptions parseIndexOptions(String fieldName, Map indexOptionsMap, IndexVersion indexVersion) { Object clusterSizeNode = indexOptionsMap.remove("cluster_size"); - int clusterSize = IVFVectorsFormat.DEFAULT_VECTORS_PER_CLUSTER; + int clusterSize = ES920DiskBBQVectorsFormat.DEFAULT_VECTORS_PER_CLUSTER; if (clusterSizeNode != null) { clusterSize = XContentMapValues.nodeIntegerValue(clusterSizeNode); if (clusterSize < MIN_VECTORS_PER_CLUSTER || clusterSize > MAX_VECTORS_PER_CLUSTER) { @@ -2189,7 +2189,7 @@ static class BBQIVFIndexOptions extends QuantizedIndexOptions { @Override KnnVectorsFormat getVectorsFormat(ElementType elementType) { assert elementType == ElementType.FLOAT; - return new IVFVectorsFormat(clusterSize, IVFVectorsFormat.DEFAULT_CENTROIDS_PER_PARENT_CLUSTER); + return new ES920DiskBBQVectorsFormat(clusterSize, ES920DiskBBQVectorsFormat.DEFAULT_CENTROIDS_PER_PARENT_CLUSTER); } @Override diff --git a/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat b/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat index 14e68029abc3b..ff02849e96c46 100644 --- a/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat +++ b/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat @@ -7,4 +7,4 @@ org.elasticsearch.index.codec.vectors.es816.ES816BinaryQuantizedVectorsFormat org.elasticsearch.index.codec.vectors.es816.ES816HnswBinaryQuantizedVectorsFormat org.elasticsearch.index.codec.vectors.es818.ES818BinaryQuantizedVectorsFormat org.elasticsearch.index.codec.vectors.es818.ES818HnswBinaryQuantizedVectorsFormat -org.elasticsearch.index.codec.vectors.IVFVectorsFormat +org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/DocIdsWriterTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/diskbbq/DocIdsWriterTests.java similarity index 98% rename from server/src/test/java/org/elasticsearch/index/codec/vectors/DocIdsWriterTests.java rename to server/src/test/java/org/elasticsearch/index/codec/vectors/diskbbq/DocIdsWriterTests.java index 9235823777b01..ec4eace740ea2 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/DocIdsWriterTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/diskbbq/DocIdsWriterTests.java @@ -17,7 +17,7 @@ * Modifications copyright (C) 2025 Elasticsearch B.V. */ -package org.elasticsearch.index.codec.vectors; +package org.elasticsearch.index.codec.vectors.diskbbq; import org.apache.lucene.document.IntPoint; import org.apache.lucene.document.SortedNumericDocValuesField; @@ -36,7 +36,7 @@ import java.util.List; import java.util.Set; -import static org.elasticsearch.index.codec.vectors.DocIdsWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE; +import static org.elasticsearch.index.codec.vectors.diskbbq.DocIdsWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE; public class DocIdsWriterTests extends LuceneTestCase { diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/IVFVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsFormatTests.java similarity index 86% rename from server/src/test/java/org/elasticsearch/index/codec/vectors/IVFVectorsFormatTests.java rename to server/src/test/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsFormatTests.java index 08693bf524691..eaf6b66c286d1 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/IVFVectorsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsFormatTests.java @@ -6,7 +6,7 @@ * your election, the "Elastic License 2.0", the "GNU Affero General Public * License v3.0 only", or the "Server Side Public License, v 1". */ -package org.elasticsearch.index.codec.vectors; +package org.elasticsearch.index.codec.vectors.diskbbq; import com.carrotsearch.randomizedtesting.generators.RandomPicks; @@ -39,14 +39,14 @@ import java.util.concurrent.atomic.AtomicBoolean; import static java.lang.String.format; -import static org.elasticsearch.index.codec.vectors.IVFVectorsFormat.MAX_CENTROIDS_PER_PARENT_CLUSTER; -import static org.elasticsearch.index.codec.vectors.IVFVectorsFormat.MAX_VECTORS_PER_CLUSTER; -import static org.elasticsearch.index.codec.vectors.IVFVectorsFormat.MIN_CENTROIDS_PER_PARENT_CLUSTER; -import static org.elasticsearch.index.codec.vectors.IVFVectorsFormat.MIN_VECTORS_PER_CLUSTER; +import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.MAX_CENTROIDS_PER_PARENT_CLUSTER; +import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.MAX_VECTORS_PER_CLUSTER; +import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.MIN_CENTROIDS_PER_PARENT_CLUSTER; +import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.MIN_VECTORS_PER_CLUSTER; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.oneOf; -public class IVFVectorsFormatTests extends BaseKnnVectorsFormatTestCase { +public class ES920DiskBBQVectorsFormatTests extends BaseKnnVectorsFormatTestCase { static { LogConfigurator.loadLog4jPlugins(); @@ -58,13 +58,13 @@ public class IVFVectorsFormatTests extends BaseKnnVectorsFormatTestCase { @Override public void setUp() throws Exception { if (rarely()) { - format = new IVFVectorsFormat( - random().nextInt(2 * MIN_VECTORS_PER_CLUSTER, IVFVectorsFormat.MAX_VECTORS_PER_CLUSTER), - random().nextInt(8, IVFVectorsFormat.MAX_CENTROIDS_PER_PARENT_CLUSTER) + format = new ES920DiskBBQVectorsFormat( + random().nextInt(2 * MIN_VECTORS_PER_CLUSTER, ES920DiskBBQVectorsFormat.MAX_VECTORS_PER_CLUSTER), + random().nextInt(8, ES920DiskBBQVectorsFormat.MAX_CENTROIDS_PER_PARENT_CLUSTER) ); } else { // run with low numbers to force many clusters with parents - format = new IVFVectorsFormat( + format = new ES920DiskBBQVectorsFormat( random().nextInt(MIN_VECTORS_PER_CLUSTER, 2 * MIN_VECTORS_PER_CLUSTER), random().nextInt(MIN_CENTROIDS_PER_PARENT_CLUSTER, 8) ); @@ -108,10 +108,10 @@ public void testToString() { FilterCodec customCodec = new FilterCodec("foo", Codec.getDefault()) { @Override public KnnVectorsFormat knnVectorsFormat() { - return new IVFVectorsFormat(128, 4); + return new ES920DiskBBQVectorsFormat(128, 4); } }; - String expectedPattern = "IVFVectorsFormat(vectorPerCluster=128)"; + String expectedPattern = "ES920DiskBBQVectorsFormat(vectorPerCluster=128)"; var defaultScorer = format(Locale.ROOT, expectedPattern, "DefaultFlatVectorScorer"); var memSegScorer = format(Locale.ROOT, expectedPattern, "Lucene99MemorySegmentFlatVectorsScorer"); @@ -119,10 +119,10 @@ public KnnVectorsFormat knnVectorsFormat() { } public void testLimits() { - expectThrows(IllegalArgumentException.class, () -> new IVFVectorsFormat(MIN_VECTORS_PER_CLUSTER - 1, 16)); - expectThrows(IllegalArgumentException.class, () -> new IVFVectorsFormat(MAX_VECTORS_PER_CLUSTER + 1, 16)); - expectThrows(IllegalArgumentException.class, () -> new IVFVectorsFormat(128, MIN_CENTROIDS_PER_PARENT_CLUSTER - 1)); - expectThrows(IllegalArgumentException.class, () -> new IVFVectorsFormat(128, MAX_CENTROIDS_PER_PARENT_CLUSTER + 1)); + expectThrows(IllegalArgumentException.class, () -> new ES920DiskBBQVectorsFormat(MIN_VECTORS_PER_CLUSTER - 1, 16)); + expectThrows(IllegalArgumentException.class, () -> new ES920DiskBBQVectorsFormat(MAX_VECTORS_PER_CLUSTER + 1, 16)); + expectThrows(IllegalArgumentException.class, () -> new ES920DiskBBQVectorsFormat(128, MIN_CENTROIDS_PER_PARENT_CLUSTER - 1)); + expectThrows(IllegalArgumentException.class, () -> new ES920DiskBBQVectorsFormat(128, MAX_CENTROIDS_PER_PARENT_CLUSTER + 1)); } public void testSimpleOffHeapSize() throws IOException { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java index 11ffe2f9dc789..c69dc264eb076 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java @@ -31,7 +31,7 @@ import org.elasticsearch.index.codec.CodecService; import org.elasticsearch.index.codec.LegacyPerFieldMapperCodec; import org.elasticsearch.index.codec.PerFieldMapperCodec; -import org.elasticsearch.index.codec.vectors.IVFVectorsFormat; +import org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat; import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.DocumentParsingException; import org.elasticsearch.index.mapper.LuceneDocument; @@ -65,7 +65,7 @@ import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH; import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN; import static org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase.randomNormalizedVector; -import static org.elasticsearch.index.codec.vectors.IVFVectorsFormat.DYNAMIC_VISIT_RATIO; +import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.DYNAMIC_VISIT_RATIO; import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.DEFAULT_OVERSAMPLE; import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.INDEXED_BY_DEFAULT_INDEX_VERSION; import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.IVF_FORMAT; @@ -1514,7 +1514,7 @@ public void testIVFParsing() throws IOException { .fieldType() .getIndexOptions(); assertEquals(3.0F, indexOptions.rescoreVector.oversample(), 0.0F); - assertEquals(IVFVectorsFormat.DEFAULT_VECTORS_PER_CLUSTER, indexOptions.clusterSize); + assertEquals(ES920DiskBBQVectorsFormat.DEFAULT_VECTORS_PER_CLUSTER, indexOptions.clusterSize); assertEquals(DYNAMIC_VISIT_RATIO, indexOptions.defaultVisitPercentage, 0.0); } { @@ -2964,7 +2964,7 @@ public void testKnnBBQIVFVectorsFormat() throws IOException { assertThat(codec, instanceOf(LegacyPerFieldMapperCodec.class)); knnVectorsFormat = ((LegacyPerFieldMapperCodec) codec).getKnnVectorsFormatForField("field"); } - String expectedString = "IVFVectorsFormat(vectorPerCluster=384)"; + String expectedString = "ES920DiskBBQVectorsFormat(vectorPerCluster=384)"; assertEquals(expectedString, knnVectorsFormat.toString()); } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java index d6206b845ae25..816551102beca 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java @@ -41,8 +41,8 @@ import java.util.Set; import java.util.function.Function; -import static org.elasticsearch.index.codec.vectors.IVFVectorsFormat.MAX_VECTORS_PER_CLUSTER; -import static org.elasticsearch.index.codec.vectors.IVFVectorsFormat.MIN_VECTORS_PER_CLUSTER; +import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.MAX_VECTORS_PER_CLUSTER; +import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.MIN_VECTORS_PER_CLUSTER; import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.BBQ_MIN_DIMS; import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.ElementType.BIT; import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.ElementType.BYTE; diff --git a/server/src/test/java/org/elasticsearch/search/vectors/AbstractDiversifyingChildrenIVFKnnVectorQueryTestCase.java b/server/src/test/java/org/elasticsearch/search/vectors/AbstractDiversifyingChildrenIVFKnnVectorQueryTestCase.java index ce08d631399d6..8e27cca564a40 100644 --- a/server/src/test/java/org/elasticsearch/search/vectors/AbstractDiversifyingChildrenIVFKnnVectorQueryTestCase.java +++ b/server/src/test/java/org/elasticsearch/search/vectors/AbstractDiversifyingChildrenIVFKnnVectorQueryTestCase.java @@ -47,7 +47,7 @@ import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.lucene.tests.util.TestUtil; import org.elasticsearch.common.logging.LogConfigurator; -import org.elasticsearch.index.codec.vectors.IVFVectorsFormat; +import org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat; import org.junit.Before; import java.io.IOException; @@ -93,9 +93,12 @@ static Document makeParent(int[] children) { @Before public void setUp() throws Exception { super.setUp(); - format = new IVFVectorsFormat( - random().nextInt(IVFVectorsFormat.MIN_VECTORS_PER_CLUSTER, IVFVectorsFormat.MAX_VECTORS_PER_CLUSTER), - random().nextInt(IVFVectorsFormat.MIN_CENTROIDS_PER_PARENT_CLUSTER, IVFVectorsFormat.MAX_CENTROIDS_PER_PARENT_CLUSTER) + format = new ES920DiskBBQVectorsFormat( + random().nextInt(ES920DiskBBQVectorsFormat.MIN_VECTORS_PER_CLUSTER, ES920DiskBBQVectorsFormat.MAX_VECTORS_PER_CLUSTER), + random().nextInt( + ES920DiskBBQVectorsFormat.MIN_CENTROIDS_PER_PARENT_CLUSTER, + ES920DiskBBQVectorsFormat.MAX_CENTROIDS_PER_PARENT_CLUSTER + ) ); } diff --git a/server/src/test/java/org/elasticsearch/search/vectors/AbstractIVFKnnVectorQueryTestCase.java b/server/src/test/java/org/elasticsearch/search/vectors/AbstractIVFKnnVectorQueryTestCase.java index 71583ce813154..e94f0dc915802 100644 --- a/server/src/test/java/org/elasticsearch/search/vectors/AbstractIVFKnnVectorQueryTestCase.java +++ b/server/src/test/java/org/elasticsearch/search/vectors/AbstractIVFKnnVectorQueryTestCase.java @@ -71,7 +71,7 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.FixedBitSet; import org.elasticsearch.common.logging.LogConfigurator; -import org.elasticsearch.index.codec.vectors.IVFVectorsFormat; +import org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat; import org.junit.Before; import java.io.IOException; @@ -98,7 +98,7 @@ abstract class AbstractIVFKnnVectorQueryTestCase extends LuceneTestCase { @Before public void setUp() throws Exception { super.setUp(); - format = new IVFVectorsFormat(128, 4); + format = new ES920DiskBBQVectorsFormat(128, 4); } abstract AbstractIVFKnnVectorQuery getKnnVectorQuery(String field, float[] query, int k, Query queryFilter, float visitRatio);