Skip to content

Commit 41a2a97

Browse files
authored
Adding a new development format for diskbbq (#135859)
This does a whole sale copy of the diskbbq versioned format into a separate module for future development. nothing new is added, just moving things around so we can modify the "next" format in preparation for new work that will be added.
1 parent 5c037f4 commit 41a2a97

17 files changed

+1845
-43
lines changed

server/src/main/java/module-info.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
* License v3.0 only", or the "Server Side Public License, v 1".
88
*/
99

10-
import org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat;
1110
import org.elasticsearch.plugins.internal.RestExtension;
1211
import org.elasticsearch.reservedstate.ReservedStateHandlerProvider;
1312

@@ -463,7 +462,8 @@
463462
org.elasticsearch.index.codec.vectors.es816.ES816HnswBinaryQuantizedVectorsFormat,
464463
org.elasticsearch.index.codec.vectors.es818.ES818BinaryQuantizedVectorsFormat,
465464
org.elasticsearch.index.codec.vectors.es818.ES818HnswBinaryQuantizedVectorsFormat,
466-
ES920DiskBBQVectorsFormat;
465+
org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat,
466+
org.elasticsearch.index.codec.vectors.diskbbq.next.ESNextDiskBBQVectorsFormat;
467467

468468
provides org.apache.lucene.codecs.Codec
469469
with

server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/CentroidAssignments.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@
99

1010
package org.elasticsearch.index.codec.vectors.diskbbq;
1111

12-
record CentroidAssignments(int numCentroids, float[][] centroids, int[] assignments, int[] overspillAssignments) {
12+
public record CentroidAssignments(int numCentroids, float[][] centroids, int[] assignments, int[] overspillAssignments) {
1313

14-
CentroidAssignments(float[][] centroids, int[] assignments, int[] overspillAssignments) {
14+
public CentroidAssignments(float[][] centroids, int[] assignments, int[] overspillAssignments) {
1515
this(centroids.length, centroids, assignments, overspillAssignments);
1616
assert assignments.length == overspillAssignments.length || overspillAssignments.length == 0
1717
: "assignments and overspillAssignments must have the same length";

server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/CentroidSupplier.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
/**
1515
* An interface for that supply centroids.
1616
*/
17-
interface CentroidSupplier {
17+
public interface CentroidSupplier {
1818
CentroidSupplier EMPTY = new CentroidSupplier() {
1919
@Override
2020
public int size() {

server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/DiskBBQBulkWriter.java

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
* This class provides the structure for writing vectors in bulk, with specific
2121
* implementations for different bit sizes strategies.
2222
*/
23-
abstract class DiskBBQBulkWriter {
23+
public abstract class DiskBBQBulkWriter {
2424
protected final int bulkSize;
2525
protected final IndexOutput out;
2626

@@ -29,18 +29,18 @@ protected DiskBBQBulkWriter(int bulkSize, IndexOutput out) {
2929
this.out = out;
3030
}
3131

32-
abstract void writeVectors(QuantizedVectorValues qvv, CheckedIntConsumer<IOException> docsWriter) throws IOException;
32+
public abstract void writeVectors(QuantizedVectorValues qvv, CheckedIntConsumer<IOException> docsWriter) throws IOException;
3333

34-
static class OneBitDiskBBQBulkWriter extends DiskBBQBulkWriter {
34+
public static class OneBitDiskBBQBulkWriter extends DiskBBQBulkWriter {
3535
private final OptimizedScalarQuantizer.QuantizationResult[] corrections;
3636

37-
OneBitDiskBBQBulkWriter(int bulkSize, IndexOutput out) {
37+
public OneBitDiskBBQBulkWriter(int bulkSize, IndexOutput out) {
3838
super(bulkSize, out);
3939
this.corrections = new OptimizedScalarQuantizer.QuantizationResult[bulkSize];
4040
}
4141

4242
@Override
43-
void writeVectors(QuantizedVectorValues qvv, CheckedIntConsumer<IOException> docsWriter) throws IOException {
43+
public void writeVectors(QuantizedVectorValues qvv, CheckedIntConsumer<IOException> docsWriter) throws IOException {
4444
int limit = qvv.count() - bulkSize + 1;
4545
int i = 0;
4646
for (; i < limit; i += bulkSize) {
@@ -93,16 +93,16 @@ private void writeCorrection(OptimizedScalarQuantizer.QuantizationResult correct
9393
}
9494
}
9595

96-
static class SevenBitDiskBBQBulkWriter extends DiskBBQBulkWriter {
96+
public static class SevenBitDiskBBQBulkWriter extends DiskBBQBulkWriter {
9797
private final OptimizedScalarQuantizer.QuantizationResult[] corrections;
9898

99-
SevenBitDiskBBQBulkWriter(int bulkSize, IndexOutput out) {
99+
public SevenBitDiskBBQBulkWriter(int bulkSize, IndexOutput out) {
100100
super(bulkSize, out);
101101
this.corrections = new OptimizedScalarQuantizer.QuantizationResult[bulkSize];
102102
}
103103

104104
@Override
105-
void writeVectors(QuantizedVectorValues qvv, CheckedIntConsumer<IOException> docsWriter) throws IOException {
105+
public void writeVectors(QuantizedVectorValues qvv, CheckedIntConsumer<IOException> docsWriter) throws IOException {
106106
int limit = qvv.count() - bulkSize + 1;
107107
int i = 0;
108108
for (; i < limit; i += bulkSize) {

server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/DocIdsWriter.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
*
3131
* <p>It is copied from the BKD implementation.
3232
*/
33-
final class DocIdsWriter {
33+
public final class DocIdsWriter {
3434

3535
private static final byte CONTINUOUS_IDS = (byte) -2;
3636
private static final byte DELTA_BPV_16 = (byte) 16;
@@ -40,7 +40,7 @@ final class DocIdsWriter {
4040

4141
private int[] scratch = new int[0];
4242

43-
DocIdsWriter() {}
43+
public DocIdsWriter() {}
4444

4545
/**
4646
* Calculate the best encoding that will be used to write blocks of doc ids of blockSize.
@@ -51,7 +51,7 @@ final class DocIdsWriter {
5151
* @param blockSize the block size
5252
* @return the byte encoding to use for the blocks
5353
*/
54-
byte calculateBlockEncoding(IntToIntFunction docIds, int count, int blockSize) {
54+
public byte calculateBlockEncoding(IntToIntFunction docIds, int count, int blockSize) {
5555
if (count == 0) {
5656
return CONTINUOUS_IDS;
5757
}
@@ -90,7 +90,7 @@ byte calculateBlockEncoding(IntToIntFunction docIds, int count, int blockSize) {
9090
}
9191
}
9292

93-
void writeDocIds(IntToIntFunction docIds, int count, byte encoding, DataOutput out) throws IOException {
93+
public void writeDocIds(IntToIntFunction docIds, int count, byte encoding, DataOutput out) throws IOException {
9494
if (count == 0) {
9595
return;
9696
}
@@ -206,7 +206,7 @@ private static int[] sortedAndMaxAndMin2Max(IntToIntFunction docIds, int count)
206206
return new int[] { (strictlySorted && min2max == count) ? 1 : 0, max, min2max };
207207
}
208208

209-
void writeDocIds(IntToIntFunction docIds, int count, DataOutput out) throws IOException {
209+
public void writeDocIds(IntToIntFunction docIds, int count, DataOutput out) throws IOException {
210210
if (count == 0) {
211211
return;
212212
}
@@ -253,7 +253,7 @@ void writeDocIds(IntToIntFunction docIds, int count, DataOutput out) throws IOEx
253253
}
254254
}
255255

256-
void readInts(IndexInput in, int count, byte encoding, int[] docIDs) throws IOException {
256+
public void readInts(IndexInput in, int count, byte encoding, int[] docIDs) throws IOException {
257257
if (count == 0) {
258258
return;
259259
}
@@ -271,7 +271,7 @@ void readInts(IndexInput in, int count, byte encoding, int[] docIDs) throws IOEx
271271
}
272272

273273
/** Read {@code count} integers into {@code docIDs}. */
274-
void readInts(IndexInput in, int count, int[] docIDs) throws IOException {
274+
public void readInts(IndexInput in, int count, int[] docIDs) throws IOException {
275275
if (count == 0) {
276276
return;
277277
}

server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsReader.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ public ES920DiskBBQVectorsReader(SegmentReadState state, Map<String, FlatVectors
4343
super(state, rawVectorsReader);
4444
}
4545

46-
CentroidIterator getPostingListPrefetchIterator(CentroidIterator centroidIterator, IndexInput postingListSlice) throws IOException {
46+
public CentroidIterator getPostingListPrefetchIterator(CentroidIterator centroidIterator, IndexInput postingListSlice)
47+
throws IOException {
4748
return new CentroidIterator() {
4849
CentroidOffsetAndLength nextOffsetAndLength = centroidIterator.hasNext()
4950
? centroidIterator.nextPostingListOffsetAndLength()
@@ -80,7 +81,7 @@ public CentroidOffsetAndLength nextPostingListOffsetAndLength() throws IOExcepti
8081
}
8182

8283
@Override
83-
CentroidIterator getCentroidIterator(
84+
public CentroidIterator getCentroidIterator(
8485
FieldInfo fieldInfo,
8586
int numCentroids,
8687
IndexInput centroids,
@@ -348,7 +349,8 @@ private static void score(
348349
}
349350

350351
@Override
351-
PostingVisitor getPostingVisitor(FieldInfo fieldInfo, IndexInput indexInput, float[] target, Bits acceptDocs) throws IOException {
352+
public PostingVisitor getPostingVisitor(FieldInfo fieldInfo, IndexInput indexInput, float[] target, Bits acceptDocs)
353+
throws IOException {
352354
FieldEntry entry = fields.get(fieldInfo.number);
353355
final int maxPostingListSize = indexInput.readVInt();
354356
return new MemorySegmentPostingsVisitor(target, indexInput, entry, fieldInfo, maxPostingListSize, acceptDocs);

server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsWriter.java

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ public ES920DiskBBQVectorsWriter(
6464
}
6565

6666
@Override
67-
CentroidOffsetAndLength buildAndWritePostingsLists(
67+
public CentroidOffsetAndLength buildAndWritePostingsLists(
6868
FieldInfo fieldInfo,
6969
CentroidSupplier centroidSupplier,
7070
FloatVectorValues floatVectorValues,
@@ -160,7 +160,7 @@ CentroidOffsetAndLength buildAndWritePostingsLists(
160160

161161
@Override
162162
@SuppressForbidden(reason = "require usage of Lucene's IOUtils#deleteFilesIgnoringExceptions(...)")
163-
CentroidOffsetAndLength buildAndWritePostingsLists(
163+
public CentroidOffsetAndLength buildAndWritePostingsLists(
164164
FieldInfo fieldInfo,
165165
CentroidSupplier centroidSupplier,
166166
FloatVectorValues floatVectorValues,
@@ -347,12 +347,17 @@ private static void printClusterQualityStatistics(int[][] clusters) {
347347
}
348348

349349
@Override
350-
CentroidSupplier createCentroidSupplier(IndexInput centroidsInput, int numCentroids, FieldInfo fieldInfo, float[] globalCentroid) {
350+
public CentroidSupplier createCentroidSupplier(
351+
IndexInput centroidsInput,
352+
int numCentroids,
353+
FieldInfo fieldInfo,
354+
float[] globalCentroid
355+
) {
351356
return new OffHeapCentroidSupplier(centroidsInput, numCentroids, fieldInfo);
352357
}
353358

354359
@Override
355-
void writeCentroids(
360+
public void writeCentroids(
356361
FieldInfo fieldInfo,
357362
CentroidSupplier centroidSupplier,
358363
float[] globalCentroid,
@@ -502,7 +507,7 @@ public int size() {
502507
* @throws IOException if an I/O error occurs
503508
*/
504509
@Override
505-
CentroidAssignments calculateCentroids(FieldInfo fieldInfo, FloatVectorValues floatVectorValues, float[] globalCentroid)
510+
public CentroidAssignments calculateCentroids(FieldInfo fieldInfo, FloatVectorValues floatVectorValues, float[] globalCentroid)
506511
throws IOException {
507512

508513
long nanoTime = System.nanoTime();

server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/IVFVectorsReader.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ protected IVFVectorsReader(SegmentReadState state, Map<String, FlatVectorsReader
106106
}
107107
}
108108

109-
abstract CentroidIterator getCentroidIterator(
109+
public abstract CentroidIterator getCentroidIterator(
110110
FieldInfo fieldInfo,
111111
int numCentroids,
112112
IndexInput centroids,
@@ -388,18 +388,18 @@ IndexInput postingListSlice(IndexInput postingListFile) throws IOException {
388388
}
389389
}
390390

391-
abstract PostingVisitor getPostingVisitor(FieldInfo fieldInfo, IndexInput postingsLists, float[] target, Bits needsScoring)
391+
public abstract PostingVisitor getPostingVisitor(FieldInfo fieldInfo, IndexInput postingsLists, float[] target, Bits needsScoring)
392392
throws IOException;
393393

394-
record CentroidOffsetAndLength(long offset, long length) {}
394+
public record CentroidOffsetAndLength(long offset, long length) {}
395395

396-
interface CentroidIterator {
396+
public interface CentroidIterator {
397397
boolean hasNext();
398398

399399
CentroidOffsetAndLength nextPostingListOffsetAndLength() throws IOException;
400400
}
401401

402-
interface PostingVisitor {
402+
public interface PostingVisitor {
403403
/** returns the number of documents in the posting list */
404404
int resetPostingsScorer(long offset) throws IOException;
405405

server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/IVFVectorsWriter.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -127,20 +127,20 @@ public final KnnFieldVectorsWriter<?> addField(FieldInfo fieldInfo) throws IOExc
127127
return rawVectorDelegate;
128128
}
129129

130-
abstract CentroidAssignments calculateCentroids(FieldInfo fieldInfo, FloatVectorValues floatVectorValues, float[] globalCentroid)
130+
public abstract CentroidAssignments calculateCentroids(FieldInfo fieldInfo, FloatVectorValues floatVectorValues, float[] globalCentroid)
131131
throws IOException;
132132

133-
record CentroidOffsetAndLength(LongValues offsets, LongValues lengths) {}
133+
public record CentroidOffsetAndLength(LongValues offsets, LongValues lengths) {}
134134

135-
abstract void writeCentroids(
135+
public abstract void writeCentroids(
136136
FieldInfo fieldInfo,
137137
CentroidSupplier centroidSupplier,
138138
float[] globalCentroid,
139139
CentroidOffsetAndLength centroidOffsetAndLength,
140140
IndexOutput centroidOutput
141141
) throws IOException;
142142

143-
abstract CentroidOffsetAndLength buildAndWritePostingsLists(
143+
public abstract CentroidOffsetAndLength buildAndWritePostingsLists(
144144
FieldInfo fieldInfo,
145145
CentroidSupplier centroidSupplier,
146146
FloatVectorValues floatVectorValues,
@@ -150,7 +150,7 @@ abstract CentroidOffsetAndLength buildAndWritePostingsLists(
150150
int[] overspillAssignments
151151
) throws IOException;
152152

153-
abstract CentroidOffsetAndLength buildAndWritePostingsLists(
153+
public abstract CentroidOffsetAndLength buildAndWritePostingsLists(
154154
FieldInfo fieldInfo,
155155
CentroidSupplier centroidSupplier,
156156
FloatVectorValues floatVectorValues,
@@ -161,7 +161,7 @@ abstract CentroidOffsetAndLength buildAndWritePostingsLists(
161161
int[] overspillAssignments
162162
) throws IOException;
163163

164-
abstract CentroidSupplier createCentroidSupplier(
164+
public abstract CentroidSupplier createCentroidSupplier(
165165
IndexInput centroidsInput,
166166
int numCentroids,
167167
FieldInfo fieldInfo,

server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/IntSorter.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,12 @@
1212
import org.apache.lucene.util.IntroSorter;
1313
import org.apache.lucene.util.hnsw.IntToIntFunction;
1414

15-
class IntSorter extends IntroSorter {
15+
public class IntSorter extends IntroSorter {
1616
int pivot = -1;
1717
private final int[] arr;
1818
private final IntToIntFunction func;
1919

20-
IntSorter(int[] arr, IntToIntFunction func) {
20+
public IntSorter(int[] arr, IntToIntFunction func) {
2121
this.arr = arr;
2222
this.func = func;
2323
}

0 commit comments

Comments
 (0)