Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
import org.elasticsearch.core.PathUtils;
import org.elasticsearch.index.codec.vectors.ES813Int8FlatVectorFormat;
import org.elasticsearch.index.codec.vectors.ES814HnswScalarQuantizedVectorsFormat;
import org.elasticsearch.index.codec.vectors.IVFVectorsFormat;
import org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat;
import org.elasticsearch.index.codec.vectors.es818.ES818BinaryQuantizedVectorsFormat;
import org.elasticsearch.index.codec.vectors.es818.ES818HnswBinaryQuantizedVectorsFormat;
import org.elasticsearch.logging.Level;
Expand Down Expand Up @@ -106,7 +106,7 @@ private static String formatIndexPath(CmdLineArgs args) {
static Codec createCodec(CmdLineArgs args) {
final KnnVectorsFormat format;
if (args.indexType() == IndexType.IVF) {
format = new IVFVectorsFormat(args.ivfClusterSize(), IVFVectorsFormat.DEFAULT_CENTROIDS_PER_PARENT_CLUSTER);
format = new ES920DiskBBQVectorsFormat(args.ivfClusterSize(), ES920DiskBBQVectorsFormat.DEFAULT_CENTROIDS_PER_PARENT_CLUSTER);
} else {
if (args.quantizeBits() == 1) {
if (args.indexType() == IndexType.FLAT) {
Expand Down
5 changes: 4 additions & 1 deletion server/src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
* License v3.0 only", or the "Server Side Public License, v 1".
*/

import org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat;
import org.elasticsearch.plugins.internal.RestExtension;
import org.elasticsearch.reservedstate.ReservedStateHandlerProvider;

Expand Down Expand Up @@ -461,7 +462,7 @@
org.elasticsearch.index.codec.vectors.es816.ES816HnswBinaryQuantizedVectorsFormat,
org.elasticsearch.index.codec.vectors.es818.ES818BinaryQuantizedVectorsFormat,
org.elasticsearch.index.codec.vectors.es818.ES818HnswBinaryQuantizedVectorsFormat,
org.elasticsearch.index.codec.vectors.IVFVectorsFormat;
ES920DiskBBQVectorsFormat;

provides org.apache.lucene.codecs.Codec
with
Expand All @@ -487,4 +488,6 @@
exports org.elasticsearch.index.codec.vectors to org.elasticsearch.test.knn;
exports org.elasticsearch.index.codec.vectors.es818 to org.elasticsearch.test.knn;
exports org.elasticsearch.inference.telemetry;
exports org.elasticsearch.index.codec.vectors.diskbbq to org.elasticsearch.test.knn;
exports org.elasticsearch.index.codec.vectors.cluster to org.elasticsearch.test.knn;
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.VectorUtil;
import org.apache.lucene.util.hnsw.IntToIntFunction;
import org.elasticsearch.index.codec.vectors.SampleReader;
import org.elasticsearch.simdvec.ESVectorUtil;

import java.io.IOException;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
*
* Modifications copyright (C) 2025 Elasticsearch B.V.
*/
package org.elasticsearch.index.codec.vectors;
package org.elasticsearch.index.codec.vectors.cluster;

import org.apache.lucene.codecs.lucene95.HasIndexSlice;
import org.apache.lucene.index.FloatVectorValues;
Expand All @@ -29,7 +29,7 @@
import java.util.Random;
import java.util.function.IntUnaryOperator;

public class SampleReader extends FloatVectorValues implements HasIndexSlice {
class SampleReader extends FloatVectorValues implements HasIndexSlice {
private final FloatVectorValues origin;
private final int sampleSize;
private final IntUnaryOperator sampleFunction;
Expand Down Expand Up @@ -81,7 +81,7 @@ public Bits getAcceptOrds(Bits acceptDocs) {
throw new IllegalStateException("Not supported");
}

public static SampleReader createSampleReader(FloatVectorValues origin, int k, long seed) {
static SampleReader createSampleReader(FloatVectorValues origin, int k, long seed) {
// TODO can we do something algorithmically that aligns an ordinal with a unique integer between 0 and numVectors?
if (k >= origin.size()) {
new SampleReader(origin, origin.size(), i -> i);
Expand All @@ -101,7 +101,7 @@ public static SampleReader createSampleReader(FloatVectorValues origin, int k, l
* @param seed random seed
* @return array of k samples
*/
public static int[] reservoirSample(int n, int k, long seed) {
static int[] reservoirSample(int n, int k, long seed) {
Random rnd = new Random(seed);
int[] reservoir = new int[k];
for (int i = 0; i < k; i++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.index.codec.vectors;
package org.elasticsearch.index.codec.vectors.diskbbq;

record CentroidAssignments(int numCentroids, float[][] centroids, int[] assignments, int[] overspillAssignments) {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.index.codec.vectors;
package org.elasticsearch.index.codec.vectors.diskbbq;

import org.apache.lucene.search.CheckedIntConsumer;
import org.apache.lucene.store.IndexOutput;
import org.elasticsearch.index.codec.vectors.OptimizedScalarQuantizer;

import java.io.IOException;

Expand All @@ -28,7 +29,7 @@ protected DiskBBQBulkWriter(int bulkSize, IndexOutput out) {
this.out = out;
}

abstract void writeVectors(DefaultIVFVectorsWriter.QuantizedVectorValues qvv, CheckedIntConsumer<IOException> docsWriter)
abstract void writeVectors(ES920DiskBBQVectorsWriter.QuantizedVectorValues qvv, CheckedIntConsumer<IOException> docsWriter)
throws IOException;

static class OneBitDiskBBQBulkWriter extends DiskBBQBulkWriter {
Expand All @@ -40,7 +41,7 @@ static class OneBitDiskBBQBulkWriter extends DiskBBQBulkWriter {
}

@Override
void writeVectors(DefaultIVFVectorsWriter.QuantizedVectorValues qvv, CheckedIntConsumer<IOException> docsWriter)
void writeVectors(ES920DiskBBQVectorsWriter.QuantizedVectorValues qvv, CheckedIntConsumer<IOException> docsWriter)
throws IOException {
int limit = qvv.count() - bulkSize + 1;
int i = 0;
Expand Down Expand Up @@ -103,7 +104,7 @@ static class SevenBitDiskBBQBulkWriter extends DiskBBQBulkWriter {
}

@Override
void writeVectors(DefaultIVFVectorsWriter.QuantizedVectorValues qvv, CheckedIntConsumer<IOException> docsWriter)
void writeVectors(ES920DiskBBQVectorsWriter.QuantizedVectorValues qvv, CheckedIntConsumer<IOException> docsWriter)
throws IOException {
int limit = qvv.count() - bulkSize + 1;
int i = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
* limitations under the License.
* Modifications copyright (C) 2025 Elasticsearch B.V.
*/
package org.elasticsearch.index.codec.vectors;
package org.elasticsearch.index.codec.vectors.diskbbq;

import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.store.DataOutput;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.index.codec.vectors;
package org.elasticsearch.index.codec.vectors.diskbbq;

import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.KnnVectorsReader;
Expand All @@ -17,6 +17,7 @@
import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsFormat;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.elasticsearch.index.codec.vectors.OptimizedScalarQuantizer;

import java.io.IOException;

Expand All @@ -42,9 +43,9 @@
* <p> Stores metadata including the number of centroids and their offsets in the clivf file</p>
*
*/
public class IVFVectorsFormat extends KnnVectorsFormat {
public class ES920DiskBBQVectorsFormat extends KnnVectorsFormat {

public static final String NAME = "IVFVectorsFormat";
public static final String NAME = "ES920DiskBBQVectorsFormat";
// centroid ordinals -> centroid values, offsets
public static final String CENTROID_EXTENSION = "cenivf";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to say it out loud the file extensions will still have 'ivf' in them. This seems fine and I can't think of a more elegant naming convention.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is ok. File name extensions are always, ugh.

// offsets contained in cen_ivf, [vector ordinals, actually just docIds](long varint), quantized
Expand Down Expand Up @@ -72,7 +73,7 @@ public class IVFVectorsFormat extends KnnVectorsFormat {
private final int vectorPerCluster;
private final int centroidsPerParentCluster;

public IVFVectorsFormat(int vectorPerCluster, int centroidsPerParentCluster) {
public ES920DiskBBQVectorsFormat(int vectorPerCluster, int centroidsPerParentCluster) {
super(NAME);
if (vectorPerCluster < MIN_VECTORS_PER_CLUSTER || vectorPerCluster > MAX_VECTORS_PER_CLUSTER) {
throw new IllegalArgumentException(
Expand All @@ -99,18 +100,18 @@ public IVFVectorsFormat(int vectorPerCluster, int centroidsPerParentCluster) {
}

/** Constructs a format using the given graph construction parameters and scalar quantization. */
public IVFVectorsFormat() {
public ES920DiskBBQVectorsFormat() {
this(DEFAULT_VECTORS_PER_CLUSTER, DEFAULT_CENTROIDS_PER_PARENT_CLUSTER);
}

@Override
public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
return new DefaultIVFVectorsWriter(state, rawVectorFormat.fieldsWriter(state), vectorPerCluster, centroidsPerParentCluster);
return new ES920DiskBBQVectorsWriter(state, rawVectorFormat.fieldsWriter(state), vectorPerCluster, centroidsPerParentCluster);
}

@Override
public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
return new DefaultIVFVectorsReader(state, rawVectorFormat.fieldsReader(state));
return new ES920DiskBBQVectorsReader(state, rawVectorFormat.fieldsReader(state));
}

@Override
Expand All @@ -120,7 +121,7 @@ public int getMaxDimensions(String fieldName) {

@Override
public String toString() {
return "IVFVectorsFormat(" + "vectorPerCluster=" + vectorPerCluster + ')';
return "ES920DiskBBQVectorsFormat(" + "vectorPerCluster=" + vectorPerCluster + ')';
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.index.codec.vectors;
package org.elasticsearch.index.codec.vectors.diskbbq;

import org.apache.lucene.codecs.hnsw.FlatVectorsReader;
import org.apache.lucene.index.FieldInfo;
Expand All @@ -18,6 +18,7 @@
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.VectorUtil;
import org.elasticsearch.index.codec.vectors.OptimizedScalarQuantizer;
import org.elasticsearch.index.codec.vectors.cluster.NeighborQueue;
import org.elasticsearch.index.codec.vectors.reflect.OffHeapStats;
import org.elasticsearch.simdvec.ES91OSQVectorsScorer;
Expand All @@ -38,9 +39,9 @@
* Default implementation of {@link IVFVectorsReader}. It scores the posting lists centroids using
* brute force and then scores the top ones using the posting list.
*/
public class DefaultIVFVectorsReader extends IVFVectorsReader implements OffHeapStats {
public class ES920DiskBBQVectorsReader extends IVFVectorsReader implements OffHeapStats {

public DefaultIVFVectorsReader(SegmentReadState state, FlatVectorsReader rawVectorsReader) throws IOException {
public ES920DiskBBQVectorsReader(SegmentReadState state, FlatVectorsReader rawVectorsReader) throws IOException {
super(state, rawVectorsReader);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.index.codec.vectors;
package org.elasticsearch.index.codec.vectors.diskbbq;

import org.apache.lucene.codecs.hnsw.FlatVectorsWriter;
import org.apache.lucene.index.FieldInfo;
Expand All @@ -23,6 +23,8 @@
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues;
import org.elasticsearch.core.SuppressForbidden;
import org.elasticsearch.index.codec.vectors.BQVectorUtils;
import org.elasticsearch.index.codec.vectors.OptimizedScalarQuantizer;
import org.elasticsearch.index.codec.vectors.cluster.HierarchicalKMeans;
import org.elasticsearch.index.codec.vectors.cluster.KMeansResult;
import org.elasticsearch.logging.LogManager;
Expand All @@ -42,13 +44,13 @@
* partition the vector space, and then stores the centroids and posting list in a sequential
* fashion.
*/
public class DefaultIVFVectorsWriter extends IVFVectorsWriter {
private static final Logger logger = LogManager.getLogger(DefaultIVFVectorsWriter.class);
public class ES920DiskBBQVectorsWriter extends IVFVectorsWriter {
private static final Logger logger = LogManager.getLogger(ES920DiskBBQVectorsWriter.class);

private final int vectorPerCluster;
private final int centroidsPerParentCluster;

public DefaultIVFVectorsWriter(
public ES920DiskBBQVectorsWriter(
SegmentWriteState state,
FlatVectorsWriter rawVectorDelegate,
int vectorPerCluster,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.index.codec.vectors;
package org.elasticsearch.index.codec.vectors.diskbbq;

import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.KnnVectorsReader;
Expand Down Expand Up @@ -35,7 +35,7 @@
import java.io.IOException;

import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.SIMILARITY_FUNCTIONS;
import static org.elasticsearch.index.codec.vectors.IVFVectorsFormat.DYNAMIC_VISIT_RATIO;
import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.DYNAMIC_VISIT_RATIO;

/**
* Reader for IVF vectors. This reader is used to read the IVF vectors from the index.
Expand All @@ -54,7 +54,11 @@ protected IVFVectorsReader(SegmentReadState state, FlatVectorsReader rawVectorsR
this.fieldInfos = state.fieldInfos;
this.rawVectorsReader = rawVectorsReader;
this.fields = new IntObjectHashMap<>();
String meta = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, IVFVectorsFormat.IVF_META_EXTENSION);
String meta = IndexFileNames.segmentFileName(
state.segmentInfo.name,
state.segmentSuffix,
ES920DiskBBQVectorsFormat.IVF_META_EXTENSION
);

int versionMeta = -1;
boolean success = false;
Expand All @@ -63,9 +67,9 @@ protected IVFVectorsReader(SegmentReadState state, FlatVectorsReader rawVectorsR
try {
versionMeta = CodecUtil.checkIndexHeader(
ivfMeta,
IVFVectorsFormat.NAME,
IVFVectorsFormat.VERSION_START,
IVFVectorsFormat.VERSION_CURRENT,
ES920DiskBBQVectorsFormat.NAME,
ES920DiskBBQVectorsFormat.VERSION_START,
ES920DiskBBQVectorsFormat.VERSION_CURRENT,
state.segmentInfo.getId(),
state.segmentSuffix
);
Expand All @@ -75,8 +79,20 @@ protected IVFVectorsReader(SegmentReadState state, FlatVectorsReader rawVectorsR
} finally {
CodecUtil.checkFooter(ivfMeta, priorE);
}
ivfCentroids = openDataInput(state, versionMeta, IVFVectorsFormat.CENTROID_EXTENSION, IVFVectorsFormat.NAME, state.context);
ivfClusters = openDataInput(state, versionMeta, IVFVectorsFormat.CLUSTER_EXTENSION, IVFVectorsFormat.NAME, state.context);
ivfCentroids = openDataInput(
state,
versionMeta,
ES920DiskBBQVectorsFormat.CENTROID_EXTENSION,
ES920DiskBBQVectorsFormat.NAME,
state.context
);
ivfClusters = openDataInput(
state,
versionMeta,
ES920DiskBBQVectorsFormat.CLUSTER_EXTENSION,
ES920DiskBBQVectorsFormat.NAME,
state.context
);
success = true;
} finally {
if (success == false) {
Expand Down Expand Up @@ -108,8 +124,8 @@ private static IndexInput openDataInput(
final int versionVectorData = CodecUtil.checkIndexHeader(
in,
codecName,
IVFVectorsFormat.VERSION_START,
IVFVectorsFormat.VERSION_CURRENT,
ES920DiskBBQVectorsFormat.VERSION_START,
ES920DiskBBQVectorsFormat.VERSION_CURRENT,
state.segmentInfo.getId(),
state.segmentSuffix
);
Expand Down
Loading