Skip to content

Commit 8b98f11

Browse files
committed
Rename DiskBBQ format to a versioned name
1 parent 1398524 commit 8b98f11

20 files changed

+138
-85
lines changed

qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
import org.elasticsearch.core.PathUtils;
3131
import org.elasticsearch.index.codec.vectors.ES813Int8FlatVectorFormat;
3232
import org.elasticsearch.index.codec.vectors.ES814HnswScalarQuantizedVectorsFormat;
33-
import org.elasticsearch.index.codec.vectors.IVFVectorsFormat;
33+
import org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat;
3434
import org.elasticsearch.index.codec.vectors.es818.ES818BinaryQuantizedVectorsFormat;
3535
import org.elasticsearch.index.codec.vectors.es818.ES818HnswBinaryQuantizedVectorsFormat;
3636
import org.elasticsearch.logging.Level;
@@ -106,7 +106,7 @@ private static String formatIndexPath(CmdLineArgs args) {
106106
static Codec createCodec(CmdLineArgs args) {
107107
final KnnVectorsFormat format;
108108
if (args.indexType() == IndexType.IVF) {
109-
format = new IVFVectorsFormat(args.ivfClusterSize(), IVFVectorsFormat.DEFAULT_CENTROIDS_PER_PARENT_CLUSTER);
109+
format = new ES920DiskBBQVectorsFormat(args.ivfClusterSize(), ES920DiskBBQVectorsFormat.DEFAULT_CENTROIDS_PER_PARENT_CLUSTER);
110110
} else {
111111
if (args.quantizeBits() == 1) {
112112
if (args.indexType() == IndexType.FLAT) {

server/src/main/java/module-info.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
* License v3.0 only", or the "Server Side Public License, v 1".
88
*/
99

10+
import org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat;
1011
import org.elasticsearch.plugins.internal.RestExtension;
1112
import org.elasticsearch.reservedstate.ReservedStateHandlerProvider;
1213

@@ -461,7 +462,7 @@
461462
org.elasticsearch.index.codec.vectors.es816.ES816HnswBinaryQuantizedVectorsFormat,
462463
org.elasticsearch.index.codec.vectors.es818.ES818BinaryQuantizedVectorsFormat,
463464
org.elasticsearch.index.codec.vectors.es818.ES818HnswBinaryQuantizedVectorsFormat,
464-
org.elasticsearch.index.codec.vectors.IVFVectorsFormat;
465+
ES920DiskBBQVectorsFormat;
465466

466467
provides org.apache.lucene.codecs.Codec
467468
with
@@ -487,4 +488,6 @@
487488
exports org.elasticsearch.index.codec.vectors to org.elasticsearch.test.knn;
488489
exports org.elasticsearch.index.codec.vectors.es818 to org.elasticsearch.test.knn;
489490
exports org.elasticsearch.inference.telemetry;
491+
exports org.elasticsearch.index.codec.vectors.diskbbq to org.elasticsearch.test.knn;
492+
exports org.elasticsearch.index.codec.vectors.cluster to org.elasticsearch.test.knn;
490493
}

server/src/main/java/org/elasticsearch/index/codec/vectors/cluster/KMeansLocal.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
import org.apache.lucene.util.FixedBitSet;
1414
import org.apache.lucene.util.VectorUtil;
1515
import org.apache.lucene.util.hnsw.IntToIntFunction;
16-
import org.elasticsearch.index.codec.vectors.SampleReader;
1716
import org.elasticsearch.simdvec.ESVectorUtil;
1817

1918
import java.io.IOException;

server/src/main/java/org/elasticsearch/index/codec/vectors/SampleReader.java renamed to server/src/main/java/org/elasticsearch/index/codec/vectors/cluster/SampleReader.java

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
110
/*
211
* @notice
312
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -17,7 +26,7 @@
1726
*
1827
* Modifications copyright (C) 2025 Elasticsearch B.V.
1928
*/
20-
package org.elasticsearch.index.codec.vectors;
29+
package org.elasticsearch.index.codec.vectors.cluster;
2130

2231
import org.apache.lucene.codecs.lucene95.HasIndexSlice;
2332
import org.apache.lucene.index.FloatVectorValues;
@@ -29,7 +38,7 @@
2938
import java.util.Random;
3039
import java.util.function.IntUnaryOperator;
3140

32-
public class SampleReader extends FloatVectorValues implements HasIndexSlice {
41+
class SampleReader extends FloatVectorValues implements HasIndexSlice {
3342
private final FloatVectorValues origin;
3443
private final int sampleSize;
3544
private final IntUnaryOperator sampleFunction;
@@ -81,7 +90,7 @@ public Bits getAcceptOrds(Bits acceptDocs) {
8190
throw new IllegalStateException("Not supported");
8291
}
8392

84-
public static SampleReader createSampleReader(FloatVectorValues origin, int k, long seed) {
93+
static SampleReader createSampleReader(FloatVectorValues origin, int k, long seed) {
8594
// TODO can we do something algorithmically that aligns an ordinal with a unique integer between 0 and numVectors?
8695
if (k >= origin.size()) {
8796
new SampleReader(origin, origin.size(), i -> i);
@@ -101,7 +110,7 @@ public static SampleReader createSampleReader(FloatVectorValues origin, int k, l
101110
* @param seed random seed
102111
* @return array of k samples
103112
*/
104-
public static int[] reservoirSample(int n, int k, long seed) {
113+
static int[] reservoirSample(int n, int k, long seed) {
105114
Random rnd = new Random(seed);
106115
int[] reservoir = new int[k];
107116
for (int i = 0; i < k; i++) {

server/src/main/java/org/elasticsearch/index/codec/vectors/CentroidAssignments.java renamed to server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/CentroidAssignments.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* License v3.0 only", or the "Server Side Public License, v 1".
88
*/
99

10-
package org.elasticsearch.index.codec.vectors;
10+
package org.elasticsearch.index.codec.vectors.diskbbq;
1111

1212
record CentroidAssignments(int numCentroids, float[][] centroids, int[] assignments, int[] overspillAssignments) {
1313

server/src/main/java/org/elasticsearch/index/codec/vectors/DiskBBQBulkWriter.java renamed to server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/DiskBBQBulkWriter.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,11 @@
77
* License v3.0 only", or the "Server Side Public License, v 1".
88
*/
99

10-
package org.elasticsearch.index.codec.vectors;
10+
package org.elasticsearch.index.codec.vectors.diskbbq;
1111

1212
import org.apache.lucene.search.CheckedIntConsumer;
1313
import org.apache.lucene.store.IndexOutput;
14+
import org.elasticsearch.index.codec.vectors.OptimizedScalarQuantizer;
1415

1516
import java.io.IOException;
1617

@@ -28,7 +29,7 @@ protected DiskBBQBulkWriter(int bulkSize, IndexOutput out) {
2829
this.out = out;
2930
}
3031

31-
abstract void writeVectors(DefaultIVFVectorsWriter.QuantizedVectorValues qvv, CheckedIntConsumer<IOException> docsWriter)
32+
abstract void writeVectors(ES920DiskBBQVectorsWriter.QuantizedVectorValues qvv, CheckedIntConsumer<IOException> docsWriter)
3233
throws IOException;
3334

3435
static class OneBitDiskBBQBulkWriter extends DiskBBQBulkWriter {
@@ -40,7 +41,7 @@ static class OneBitDiskBBQBulkWriter extends DiskBBQBulkWriter {
4041
}
4142

4243
@Override
43-
void writeVectors(DefaultIVFVectorsWriter.QuantizedVectorValues qvv, CheckedIntConsumer<IOException> docsWriter)
44+
void writeVectors(ES920DiskBBQVectorsWriter.QuantizedVectorValues qvv, CheckedIntConsumer<IOException> docsWriter)
4445
throws IOException {
4546
int limit = qvv.count() - bulkSize + 1;
4647
int i = 0;
@@ -103,7 +104,7 @@ static class SevenBitDiskBBQBulkWriter extends DiskBBQBulkWriter {
103104
}
104105

105106
@Override
106-
void writeVectors(DefaultIVFVectorsWriter.QuantizedVectorValues qvv, CheckedIntConsumer<IOException> docsWriter)
107+
void writeVectors(ES920DiskBBQVectorsWriter.QuantizedVectorValues qvv, CheckedIntConsumer<IOException> docsWriter)
107108
throws IOException {
108109
int limit = qvv.count() - bulkSize + 1;
109110
int i = 0;

server/src/main/java/org/elasticsearch/index/codec/vectors/DocIdsWriter.java renamed to server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/DocIdsWriter.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
110
/*
211
* @notice
312
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -16,7 +25,7 @@
1625
* limitations under the License.
1726
* Modifications copyright (C) 2025 Elasticsearch B.V.
1827
*/
19-
package org.elasticsearch.index.codec.vectors;
28+
package org.elasticsearch.index.codec.vectors.diskbbq;
2029

2130
import org.apache.lucene.index.PointValues.IntersectVisitor;
2231
import org.apache.lucene.store.DataOutput;

server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsFormat.java renamed to server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsFormat.java

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* License v3.0 only", or the "Server Side Public License, v 1".
88
*/
99

10-
package org.elasticsearch.index.codec.vectors;
10+
package org.elasticsearch.index.codec.vectors.diskbbq;
1111

1212
import org.apache.lucene.codecs.KnnVectorsFormat;
1313
import org.apache.lucene.codecs.KnnVectorsReader;
@@ -17,6 +17,7 @@
1717
import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsFormat;
1818
import org.apache.lucene.index.SegmentReadState;
1919
import org.apache.lucene.index.SegmentWriteState;
20+
import org.elasticsearch.index.codec.vectors.OptimizedScalarQuantizer;
2021

2122
import java.io.IOException;
2223

@@ -42,9 +43,9 @@
4243
* <p> Stores metadata including the number of centroids and their offsets in the clivf file</p>
4344
*
4445
*/
45-
public class IVFVectorsFormat extends KnnVectorsFormat {
46+
public class ES920DiskBBQVectorsFormat extends KnnVectorsFormat {
4647

47-
public static final String NAME = "IVFVectorsFormat";
48+
public static final String NAME = "ES920DiskBBQVectorsFormat";
4849
// centroid ordinals -> centroid values, offsets
4950
public static final String CENTROID_EXTENSION = "cenivf";
5051
// offsets contained in cen_ivf, [vector ordinals, actually just docIds](long varint), quantized
@@ -72,7 +73,7 @@ public class IVFVectorsFormat extends KnnVectorsFormat {
7273
private final int vectorPerCluster;
7374
private final int centroidsPerParentCluster;
7475

75-
public IVFVectorsFormat(int vectorPerCluster, int centroidsPerParentCluster) {
76+
public ES920DiskBBQVectorsFormat(int vectorPerCluster, int centroidsPerParentCluster) {
7677
super(NAME);
7778
if (vectorPerCluster < MIN_VECTORS_PER_CLUSTER || vectorPerCluster > MAX_VECTORS_PER_CLUSTER) {
7879
throw new IllegalArgumentException(
@@ -99,18 +100,18 @@ public IVFVectorsFormat(int vectorPerCluster, int centroidsPerParentCluster) {
99100
}
100101

101102
/** Constructs a format using the given graph construction parameters and scalar quantization. */
102-
public IVFVectorsFormat() {
103+
public ES920DiskBBQVectorsFormat() {
103104
this(DEFAULT_VECTORS_PER_CLUSTER, DEFAULT_CENTROIDS_PER_PARENT_CLUSTER);
104105
}
105106

106107
@Override
107108
public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
108-
return new DefaultIVFVectorsWriter(state, rawVectorFormat.fieldsWriter(state), vectorPerCluster, centroidsPerParentCluster);
109+
return new ES920DiskBBQVectorsWriter(state, rawVectorFormat.fieldsWriter(state), vectorPerCluster, centroidsPerParentCluster);
109110
}
110111

111112
@Override
112113
public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
113-
return new DefaultIVFVectorsReader(state, rawVectorFormat.fieldsReader(state));
114+
return new ES920DiskBBQVectorsReader(state, rawVectorFormat.fieldsReader(state));
114115
}
115116

116117
@Override
@@ -120,7 +121,7 @@ public int getMaxDimensions(String fieldName) {
120121

121122
@Override
122123
public String toString() {
123-
return "IVFVectorsFormat(" + "vectorPerCluster=" + vectorPerCluster + ')';
124+
return "ES920DiskBBQVectorsFormat(" + "vectorPerCluster=" + vectorPerCluster + ')';
124125
}
125126

126127
}

server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java renamed to server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsReader.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* License v3.0 only", or the "Server Side Public License, v 1".
88
*/
99

10-
package org.elasticsearch.index.codec.vectors;
10+
package org.elasticsearch.index.codec.vectors.diskbbq;
1111

1212
import org.apache.lucene.codecs.hnsw.FlatVectorsReader;
1313
import org.apache.lucene.index.FieldInfo;
@@ -18,6 +18,7 @@
1818
import org.apache.lucene.util.ArrayUtil;
1919
import org.apache.lucene.util.Bits;
2020
import org.apache.lucene.util.VectorUtil;
21+
import org.elasticsearch.index.codec.vectors.OptimizedScalarQuantizer;
2122
import org.elasticsearch.index.codec.vectors.cluster.NeighborQueue;
2223
import org.elasticsearch.index.codec.vectors.reflect.OffHeapStats;
2324
import org.elasticsearch.simdvec.ES91OSQVectorsScorer;
@@ -38,9 +39,9 @@
3839
* Default implementation of {@link IVFVectorsReader}. It scores the posting lists centroids using
3940
* brute force and then scores the top ones using the posting list.
4041
*/
41-
public class DefaultIVFVectorsReader extends IVFVectorsReader implements OffHeapStats {
42+
public class ES920DiskBBQVectorsReader extends IVFVectorsReader implements OffHeapStats {
4243

43-
public DefaultIVFVectorsReader(SegmentReadState state, FlatVectorsReader rawVectorsReader) throws IOException {
44+
public ES920DiskBBQVectorsReader(SegmentReadState state, FlatVectorsReader rawVectorsReader) throws IOException {
4445
super(state, rawVectorsReader);
4546
}
4647

server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java renamed to server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsWriter.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* License v3.0 only", or the "Server Side Public License, v 1".
88
*/
99

10-
package org.elasticsearch.index.codec.vectors;
10+
package org.elasticsearch.index.codec.vectors.diskbbq;
1111

1212
import org.apache.lucene.codecs.hnsw.FlatVectorsWriter;
1313
import org.apache.lucene.index.FieldInfo;
@@ -23,6 +23,8 @@
2323
import org.apache.lucene.util.packed.PackedInts;
2424
import org.apache.lucene.util.packed.PackedLongValues;
2525
import org.elasticsearch.core.SuppressForbidden;
26+
import org.elasticsearch.index.codec.vectors.BQVectorUtils;
27+
import org.elasticsearch.index.codec.vectors.OptimizedScalarQuantizer;
2628
import org.elasticsearch.index.codec.vectors.cluster.HierarchicalKMeans;
2729
import org.elasticsearch.index.codec.vectors.cluster.KMeansResult;
2830
import org.elasticsearch.logging.LogManager;
@@ -42,13 +44,13 @@
4244
* partition the vector space, and then stores the centroids and posting list in a sequential
4345
* fashion.
4446
*/
45-
public class DefaultIVFVectorsWriter extends IVFVectorsWriter {
46-
private static final Logger logger = LogManager.getLogger(DefaultIVFVectorsWriter.class);
47+
public class ES920DiskBBQVectorsWriter extends IVFVectorsWriter {
48+
private static final Logger logger = LogManager.getLogger(ES920DiskBBQVectorsWriter.class);
4749

4850
private final int vectorPerCluster;
4951
private final int centroidsPerParentCluster;
5052

51-
public DefaultIVFVectorsWriter(
53+
public ES920DiskBBQVectorsWriter(
5254
SegmentWriteState state,
5355
FlatVectorsWriter rawVectorDelegate,
5456
int vectorPerCluster,

0 commit comments

Comments
 (0)