Skip to content

Commit 5eaea78

Browse files
authored
Refactor/disk bbq class movements (#135367)
This is a minor refactor. But it makes some the interfaces cleaner
1 parent ed97c40 commit 5eaea78

File tree

8 files changed

+146
-92
lines changed

8 files changed

+146
-92
lines changed
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.index.codec.vectors.diskbbq;
11+
12+
import java.io.IOException;
13+
14+
/**
15+
* An interface for that supply centroids.
16+
*/
17+
interface CentroidSupplier {
18+
CentroidSupplier EMPTY = new CentroidSupplier() {
19+
@Override
20+
public int size() {
21+
return 0;
22+
}
23+
24+
@Override
25+
public float[] centroid(int centroidOrdinal) {
26+
throw new IllegalStateException("No centroids");
27+
}
28+
};
29+
30+
int size();
31+
32+
float[] centroid(int centroidOrdinal) throws IOException;
33+
34+
static CentroidSupplier fromArray(float[][] centroids) {
35+
if (centroids.length == 0) {
36+
return EMPTY;
37+
}
38+
return new CentroidSupplier() {
39+
@Override
40+
public int size() {
41+
return centroids.length;
42+
}
43+
44+
@Override
45+
public float[] centroid(int centroidOrdinal) {
46+
return centroids[centroidOrdinal];
47+
}
48+
};
49+
}
50+
}

server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/DiskBBQBulkWriter.java

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,7 @@ protected DiskBBQBulkWriter(int bulkSize, IndexOutput out) {
2929
this.out = out;
3030
}
3131

32-
abstract void writeVectors(ES920DiskBBQVectorsWriter.QuantizedVectorValues qvv, CheckedIntConsumer<IOException> docsWriter)
33-
throws IOException;
32+
abstract void writeVectors(QuantizedVectorValues qvv, CheckedIntConsumer<IOException> docsWriter) throws IOException;
3433

3534
static class OneBitDiskBBQBulkWriter extends DiskBBQBulkWriter {
3635
private final OptimizedScalarQuantizer.QuantizationResult[] corrections;
@@ -41,8 +40,7 @@ static class OneBitDiskBBQBulkWriter extends DiskBBQBulkWriter {
4140
}
4241

4342
@Override
44-
void writeVectors(ES920DiskBBQVectorsWriter.QuantizedVectorValues qvv, CheckedIntConsumer<IOException> docsWriter)
45-
throws IOException {
43+
void writeVectors(QuantizedVectorValues qvv, CheckedIntConsumer<IOException> docsWriter) throws IOException {
4644
int limit = qvv.count() - bulkSize + 1;
4745
int i = 0;
4846
for (; i < limit; i += bulkSize) {
@@ -104,8 +102,7 @@ static class SevenBitDiskBBQBulkWriter extends DiskBBQBulkWriter {
104102
}
105103

106104
@Override
107-
void writeVectors(ES920DiskBBQVectorsWriter.QuantizedVectorValues qvv, CheckedIntConsumer<IOException> docsWriter)
108-
throws IOException {
105+
void writeVectors(QuantizedVectorValues qvv, CheckedIntConsumer<IOException> docsWriter) throws IOException {
109106
int limit = qvv.count() - bulkSize + 1;
110107
int i = 0;
111108
for (; i < limit; i += bulkSize) {

server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsWriter.java

Lines changed: 1 addition & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
import org.apache.lucene.store.IOContext;
1818
import org.apache.lucene.store.IndexInput;
1919
import org.apache.lucene.store.IndexOutput;
20-
import org.apache.lucene.util.IntroSorter;
2120
import org.apache.lucene.util.VectorUtil;
2221
import org.apache.lucene.util.hnsw.IntToIntFunction;
2322
import org.apache.lucene.util.packed.PackedInts;
@@ -384,7 +383,7 @@ private void writeCentroidsWithParents(
384383
centroidOutput.writeVInt(centroidGroups.centroids.length);
385384
centroidOutput.writeVInt(centroidGroups.maxVectorsPerCentroidLength);
386385
QuantizedCentroids parentQuantizeCentroid = new QuantizedCentroids(
387-
new OnHeapCentroidSupplier(centroidGroups.centroids),
386+
CentroidSupplier.fromArray(centroidGroups.centroids),
388387
fieldInfo.getVectorDimension(),
389388
osq,
390389
globalCentroid
@@ -577,18 +576,6 @@ public float[] centroid(int centroidOrdinal) throws IOException {
577576
}
578577
}
579578

580-
interface QuantizedVectorValues {
581-
int count();
582-
583-
byte[] next() throws IOException;
584-
585-
OptimizedScalarQuantizer.QuantizationResult getCorrections() throws IOException;
586-
}
587-
588-
interface IntToBooleanFunction {
589-
boolean apply(int ord);
590-
}
591-
592579
static class QuantizedCentroids implements QuantizedVectorValues {
593580
private final CentroidSupplier supplier;
594581
private final OptimizedScalarQuantizer quantizer;
@@ -760,37 +747,4 @@ public void readQuantizedVector(int ord, boolean isOverspill) throws IOException
760747
bitSum = quantizedVectorsInput.readShort();
761748
}
762749
}
763-
764-
private static class IntSorter extends IntroSorter {
765-
int pivot = -1;
766-
private final int[] arr;
767-
private final IntToIntFunction func;
768-
769-
private IntSorter(int[] arr, IntToIntFunction func) {
770-
this.arr = arr;
771-
this.func = func;
772-
}
773-
774-
@Override
775-
protected void setPivot(int i) {
776-
pivot = func.apply(arr[i]);
777-
}
778-
779-
@Override
780-
protected int comparePivot(int j) {
781-
return Integer.compare(pivot, func.apply(arr[j]));
782-
}
783-
784-
@Override
785-
protected int compare(int a, int b) {
786-
return Integer.compare(func.apply(arr[a]), func.apply(arr[b]));
787-
}
788-
789-
@Override
790-
protected void swap(int i, int j) {
791-
final int tmp = arr[i];
792-
arr[i] = arr[j];
793-
arr[j] = tmp;
794-
}
795-
}
796750
}

server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/IVFVectorsReader.java

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -400,8 +400,6 @@ interface CentroidIterator {
400400
}
401401

402402
interface PostingVisitor {
403-
// TODO maybe we can not specifically pass the centroid...
404-
405403
/** returns the number of documents in the posting list */
406404
int resetPostingsScorer(long offset) throws IOException;
407405

server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/IVFVectorsWriter.java

Lines changed: 1 addition & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ public final void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException {
183183
// build centroids
184184
final CentroidAssignments centroidAssignments = calculateCentroids(fieldWriter.fieldInfo, floatVectorValues, globalCentroid);
185185
// wrap centroids with a supplier
186-
final CentroidSupplier centroidSupplier = new OnHeapCentroidSupplier(centroidAssignments.centroids());
186+
final CentroidSupplier centroidSupplier = CentroidSupplier.fromArray(centroidAssignments.centroids());
187187
// write posting lists
188188
final long postingListOffset = ivfClusters.alignFilePointer(Float.BYTES);
189189
final CentroidOffsetAndLength centroidOffsetAndLength = buildAndWritePostingsLists(
@@ -549,40 +549,4 @@ public final long ramBytesUsed() {
549549

550550
private record FieldWriter(FieldInfo fieldInfo, FlatFieldVectorsWriter<float[]> delegate) {}
551551

552-
interface CentroidSupplier {
553-
CentroidSupplier EMPTY = new CentroidSupplier() {
554-
@Override
555-
public int size() {
556-
return 0;
557-
}
558-
559-
@Override
560-
public float[] centroid(int centroidOrdinal) {
561-
throw new IllegalStateException("No centroids");
562-
}
563-
};
564-
565-
int size();
566-
567-
float[] centroid(int centroidOrdinal) throws IOException;
568-
}
569-
570-
// TODO throw away rawCentroids
571-
static class OnHeapCentroidSupplier implements CentroidSupplier {
572-
private final float[][] centroids;
573-
574-
OnHeapCentroidSupplier(float[][] centroids) {
575-
this.centroids = centroids;
576-
}
577-
578-
@Override
579-
public int size() {
580-
return centroids.length;
581-
}
582-
583-
@Override
584-
public float[] centroid(int centroidOrdinal) throws IOException {
585-
return centroids[centroidOrdinal];
586-
}
587-
}
588552
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.index.codec.vectors.diskbbq;
11+
12+
import org.apache.lucene.util.IntroSorter;
13+
import org.apache.lucene.util.hnsw.IntToIntFunction;
14+
15+
class IntSorter extends IntroSorter {
16+
int pivot = -1;
17+
private final int[] arr;
18+
private final IntToIntFunction func;
19+
20+
IntSorter(int[] arr, IntToIntFunction func) {
21+
this.arr = arr;
22+
this.func = func;
23+
}
24+
25+
@Override
26+
protected void setPivot(int i) {
27+
pivot = func.apply(arr[i]);
28+
}
29+
30+
@Override
31+
protected int comparePivot(int j) {
32+
return Integer.compare(pivot, func.apply(arr[j]));
33+
}
34+
35+
@Override
36+
protected int compare(int a, int b) {
37+
return Integer.compare(func.apply(arr[a]), func.apply(arr[b]));
38+
}
39+
40+
@Override
41+
protected void swap(int i, int j) {
42+
final int tmp = arr[i];
43+
arr[i] = arr[j];
44+
arr[j] = tmp;
45+
}
46+
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.index.codec.vectors.diskbbq;
11+
12+
/**
13+
* Functional interface representing a function that takes an integer input
14+
* and produces a boolean output.
15+
*/
16+
interface IntToBooleanFunction {
17+
boolean apply(int value);
18+
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.index.codec.vectors.diskbbq;
11+
12+
import org.elasticsearch.index.codec.vectors.OptimizedScalarQuantizer;
13+
14+
import java.io.IOException;
15+
16+
/**
17+
* Interface representing a collection of quantized vector values.
18+
* Provides methods to iterate through the vectors and retrieve
19+
* associated quantization correction data.
20+
*/
21+
interface QuantizedVectorValues {
22+
int count();
23+
24+
byte[] next() throws IOException;
25+
26+
OptimizedScalarQuantizer.QuantizationResult getCorrections() throws IOException;
27+
}

0 commit comments

Comments
 (0)