Skip to content

Commit 5ef4cf1

Browse files
committed
PR comments
1 parent 2f8e975 commit 5ef4cf1

File tree

12 files changed

+38
-353
lines changed

12 files changed

+38
-353
lines changed

qa/vector/src/main/java/org/elasticsearch/test/knn/CmdLineArgs.java

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,6 @@ record CmdLineArgs(
5151
float filterSelectivity,
5252
long seed,
5353
VectorSimilarityFunction vectorSpace,
54-
int rawVectorSize,
5554
int quantizeBits,
5655
VectorEncoding vectorEncoding,
5756
int dimensions,
@@ -81,7 +80,6 @@ record CmdLineArgs(
8180
static final ParseField FORCE_MERGE_FIELD = new ParseField("force_merge");
8281
static final ParseField VECTOR_SPACE_FIELD = new ParseField("vector_space");
8382
static final ParseField QUANTIZE_BITS_FIELD = new ParseField("quantize_bits");
84-
static final ParseField RAW_VECTOR_SIZE_FIELD = new ParseField("raw_vector_size");
8583
static final ParseField VECTOR_ENCODING_FIELD = new ParseField("vector_encoding");
8684
static final ParseField DIMENSIONS_FIELD = new ParseField("dimensions");
8785
static final ParseField EARLY_TERMINATION_FIELD = new ParseField("early_termination");
@@ -125,7 +123,6 @@ static CmdLineArgs fromXContent(XContentParser parser) throws IOException {
125123
PARSER.declareBoolean(Builder::setReindex, REINDEX_FIELD);
126124
PARSER.declareBoolean(Builder::setForceMerge, FORCE_MERGE_FIELD);
127125
PARSER.declareString(Builder::setVectorSpace, VECTOR_SPACE_FIELD);
128-
PARSER.declareInt(Builder::setRawVectorSize, RAW_VECTOR_SIZE_FIELD);
129126
PARSER.declareInt(Builder::setQuantizeBits, QUANTIZE_BITS_FIELD);
130127
PARSER.declareString(Builder::setVectorEncoding, VECTOR_ENCODING_FIELD);
131128
PARSER.declareInt(Builder::setDimensions, DIMENSIONS_FIELD);
@@ -164,7 +161,6 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
164161
builder.field(REINDEX_FIELD.getPreferredName(), reindex);
165162
builder.field(FORCE_MERGE_FIELD.getPreferredName(), forceMerge);
166163
builder.field(VECTOR_SPACE_FIELD.getPreferredName(), vectorSpace.name().toLowerCase(Locale.ROOT));
167-
builder.field(RAW_VECTOR_SIZE_FIELD.getPreferredName(), rawVectorSize);
168164
builder.field(QUANTIZE_BITS_FIELD.getPreferredName(), quantizeBits);
169165
builder.field(VECTOR_ENCODING_FIELD.getPreferredName(), vectorEncoding.name().toLowerCase(Locale.ROOT));
170166
builder.field(DIMENSIONS_FIELD.getPreferredName(), dimensions);
@@ -200,7 +196,6 @@ static class Builder {
200196
private boolean reindex = false;
201197
private boolean forceMerge = false;
202198
private VectorSimilarityFunction vectorSpace = VectorSimilarityFunction.EUCLIDEAN;
203-
private int rawVectorSize = 32;
204199
private int quantizeBits = 8;
205200
private VectorEncoding vectorEncoding = VectorEncoding.FLOAT32;
206201
private int dimensions;
@@ -310,11 +305,6 @@ public Builder setVectorSpace(String vectorSpace) {
310305
return this;
311306
}
312307

313-
public Builder setRawVectorSize(int rawVectorSize) {
314-
this.rawVectorSize = rawVectorSize;
315-
return this;
316-
}
317-
318308
public Builder setQuantizeBits(int quantizeBits) {
319309
this.quantizeBits = quantizeBits;
320310
return this;
@@ -390,7 +380,6 @@ public CmdLineArgs build() {
390380
filterSelectivity,
391381
seed,
392382
vectorSpace,
393-
rawVectorSize,
394383
quantizeBits,
395384
vectorEncoding,
396385
dimensions,

server/src/main/java/org/elasticsearch/index/codec/vectors/BFloat16.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
import java.nio.ByteOrder;
1515
import java.nio.ShortBuffer;
1616

17-
public class BFloat16 {
17+
public final class BFloat16 {
1818

1919
public static final int BYTES = Short.BYTES;
2020

server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsFormat.java

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
import org.elasticsearch.index.codec.vectors.DirectIOCapableFlatVectorsFormat;
1919
import org.elasticsearch.index.codec.vectors.OptimizedScalarQuantizer;
2020
import org.elasticsearch.index.codec.vectors.es93.DirectIOCapableLucene99FlatVectorsFormat;
21-
import org.elasticsearch.index.codec.vectors.es93.ES93BFloat16FlatVectorsFormat;
2221

2322
import java.io.IOException;
2423
import java.util.Map;
@@ -62,14 +61,9 @@ public class ES920DiskBBQVectorsFormat extends KnnVectorsFormat {
6261
private static final DirectIOCapableFlatVectorsFormat float32VectorFormat = new DirectIOCapableLucene99FlatVectorsFormat(
6362
FlatVectorScorerUtil.getLucene99FlatVectorsScorer()
6463
);
65-
private static final DirectIOCapableFlatVectorsFormat bfloat16VectorFormat = new ES93BFloat16FlatVectorsFormat(
66-
FlatVectorScorerUtil.getLucene99FlatVectorsScorer()
67-
);
6864
private static final Map<String, DirectIOCapableFlatVectorsFormat> supportedFormats = Map.of(
6965
float32VectorFormat.getName(),
70-
float32VectorFormat,
71-
bfloat16VectorFormat.getName(),
72-
bfloat16VectorFormat
66+
float32VectorFormat
7367
);
7468

7569
// This dynamically sets the cluster probe based on the `k` requested and the number of clusters.
@@ -88,10 +82,10 @@ public class ES920DiskBBQVectorsFormat extends KnnVectorsFormat {
8882
private final DirectIOCapableFlatVectorsFormat rawVectorFormat;
8983

9084
public ES920DiskBBQVectorsFormat(int vectorPerCluster, int centroidsPerParentCluster) {
91-
this(vectorPerCluster, centroidsPerParentCluster, false, false);
85+
this(vectorPerCluster, centroidsPerParentCluster, false);
9286
}
9387

94-
public ES920DiskBBQVectorsFormat(int vectorPerCluster, int centroidsPerParentCluster, boolean useDirectIO, boolean useBFloat16) {
88+
public ES920DiskBBQVectorsFormat(int vectorPerCluster, int centroidsPerParentCluster, boolean useDirectIO) {
9589
super(NAME);
9690
if (vectorPerCluster < MIN_VECTORS_PER_CLUSTER || vectorPerCluster > MAX_VECTORS_PER_CLUSTER) {
9791
throw new IllegalArgumentException(
@@ -116,7 +110,7 @@ public ES920DiskBBQVectorsFormat(int vectorPerCluster, int centroidsPerParentClu
116110
this.vectorPerCluster = vectorPerCluster;
117111
this.centroidsPerParentCluster = centroidsPerParentCluster;
118112
this.useDirectIO = useDirectIO;
119-
this.rawVectorFormat = useBFloat16 ? bfloat16VectorFormat : float32VectorFormat;
113+
this.rawVectorFormat = float32VectorFormat;
120114
}
121115

122116
/** Constructs a format using the given graph construction parameters and scalar quantization. */

server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93BinaryQuantizedVectorsFormat.java

Lines changed: 2 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,12 @@
2525
import org.apache.lucene.codecs.hnsw.FlatVectorsWriter;
2626
import org.apache.lucene.index.SegmentReadState;
2727
import org.apache.lucene.index.SegmentWriteState;
28-
import org.elasticsearch.index.codec.vectors.DirectIOCapableFlatVectorsFormat;
2928
import org.elasticsearch.index.codec.vectors.OptimizedScalarQuantizer;
3029
import org.elasticsearch.index.codec.vectors.es818.ES818BinaryFlatVectorsScorer;
3130
import org.elasticsearch.index.codec.vectors.es818.ES818BinaryQuantizedVectorsReader;
3231
import org.elasticsearch.index.codec.vectors.es818.ES818BinaryQuantizedVectorsWriter;
3332

3433
import java.io.IOException;
35-
import java.util.Map;
3634

3735
/**
3836
* Copied from Lucene, replace with Lucene's implementation sometime after Lucene 10
@@ -91,57 +89,23 @@ public class ES93BinaryQuantizedVectorsFormat extends ES93GenericFlatVectorsForm
9189

9290
public static final String NAME = "ES93BinaryQuantizedVectorsFormat";
9391

94-
private static final DirectIOCapableFlatVectorsFormat float32VectorFormat = new DirectIOCapableLucene99FlatVectorsFormat(
95-
FlatVectorScorerUtil.getLucene99FlatVectorsScorer()
96-
);
97-
private static final DirectIOCapableFlatVectorsFormat bfloat16VectorFormat = new ES93BFloat16FlatVectorsFormat(
98-
FlatVectorScorerUtil.getLucene99FlatVectorsScorer()
99-
);
100-
101-
private static final Map<String, DirectIOCapableFlatVectorsFormat> supportedFormats = Map.of(
102-
float32VectorFormat.getName(),
103-
float32VectorFormat,
104-
bfloat16VectorFormat.getName(),
105-
bfloat16VectorFormat
106-
);
107-
10892
private static final ES818BinaryFlatVectorsScorer scorer = new ES818BinaryFlatVectorsScorer(
10993
FlatVectorScorerUtil.getLucene99FlatVectorsScorer()
11094
);
11195

112-
private final boolean useDirectIO;
113-
private final DirectIOCapableFlatVectorsFormat rawFormat;
114-
11596
public ES93BinaryQuantizedVectorsFormat() {
11697
this(false, false);
11798
}
11899

119-
public ES93BinaryQuantizedVectorsFormat(boolean useDirectIO, boolean useBFloat16) {
120-
super(NAME);
121-
this.useDirectIO = useDirectIO;
122-
this.rawFormat = useBFloat16 ? bfloat16VectorFormat : float32VectorFormat;
100+
public ES93BinaryQuantizedVectorsFormat(boolean useBFloat16, boolean useDirectIO) {
101+
super(NAME, useBFloat16, useDirectIO);
123102
}
124103

125104
@Override
126105
protected FlatVectorsScorer flatVectorsScorer() {
127106
return scorer;
128107
}
129108

130-
@Override
131-
protected boolean useDirectIOReads() {
132-
return useDirectIO;
133-
}
134-
135-
@Override
136-
protected DirectIOCapableFlatVectorsFormat writeFlatVectorsFormat() {
137-
return rawFormat;
138-
}
139-
140-
@Override
141-
protected Map<String, DirectIOCapableFlatVectorsFormat> supportedReadFlatVectorsFormats() {
142-
return supportedFormats;
143-
}
144-
145109
@Override
146110
public FlatVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
147111
return new ES818BinaryQuantizedVectorsWriter(scorer, super.fieldsWriter(state), state);

server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93GenericFlatVectorsFormat.java

Lines changed: 23 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
package org.elasticsearch.index.codec.vectors.es93;
1111

12+
import org.apache.lucene.codecs.hnsw.FlatVectorScorerUtil;
1213
import org.apache.lucene.codecs.hnsw.FlatVectorsReader;
1314
import org.apache.lucene.codecs.hnsw.FlatVectorsWriter;
1415
import org.apache.lucene.index.SegmentReadState;
@@ -34,42 +35,45 @@ public abstract class ES93GenericFlatVectorsFormat extends AbstractFlatVectorsFo
3435
VERSION_CURRENT
3536
);
3637

37-
public ES93GenericFlatVectorsFormat(String name) {
38-
super(name);
39-
}
38+
private static final DirectIOCapableFlatVectorsFormat float32VectorFormat = new DirectIOCapableLucene99FlatVectorsFormat(
39+
FlatVectorScorerUtil.getLucene99FlatVectorsScorer()
40+
);
41+
private static final DirectIOCapableFlatVectorsFormat bfloat16VectorFormat = new ES93BFloat16FlatVectorsFormat(
42+
FlatVectorScorerUtil.getLucene99FlatVectorsScorer()
43+
);
4044

41-
protected abstract DirectIOCapableFlatVectorsFormat writeFlatVectorsFormat();
45+
private static final Map<String, DirectIOCapableFlatVectorsFormat> supportedFormats = Map.of(
46+
float32VectorFormat.getName(),
47+
float32VectorFormat,
48+
bfloat16VectorFormat.getName(),
49+
bfloat16VectorFormat
50+
);
4251

43-
protected abstract boolean useDirectIOReads();
52+
private final DirectIOCapableFlatVectorsFormat writeFormat;
53+
private final boolean useDirectIO;
4454

45-
protected abstract Map<String, DirectIOCapableFlatVectorsFormat> supportedReadFlatVectorsFormats();
55+
public ES93GenericFlatVectorsFormat(String name, boolean useBFloat16, boolean useDirectIO) {
56+
super(name);
57+
writeFormat = useBFloat16 ? bfloat16VectorFormat : float32VectorFormat;
58+
this.useDirectIO = useDirectIO;
59+
}
4660

4761
@Override
4862
public FlatVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
49-
var flatFormat = writeFlatVectorsFormat();
50-
boolean directIO = useDirectIOReads();
51-
return new ES93GenericFlatVectorsWriter(META, flatFormat.getName(), directIO, state, flatFormat.fieldsWriter(state));
63+
return new ES93GenericFlatVectorsWriter(META, writeFormat.getName(), useDirectIO, state, writeFormat.fieldsWriter(state));
5264
}
5365

5466
@Override
5567
public FlatVectorsReader fieldsReader(SegmentReadState state) throws IOException {
56-
var readFormats = supportedReadFlatVectorsFormats();
5768
return new ES93GenericFlatVectorsReader(META, state, (f, dio) -> {
58-
var format = readFormats.get(f);
69+
var format = supportedFormats.get(f);
5970
if (format == null) return null;
6071
return format.fieldsReader(state, dio);
6172
});
6273
}
6374

6475
@Override
6576
public String toString() {
66-
return getName()
67-
+ "(name="
68-
+ getName()
69-
+ ", writeFlatVectorFormat="
70-
+ writeFlatVectorsFormat()
71-
+ ", readFlatVectorsFormats="
72-
+ supportedReadFlatVectorsFormats().values()
73-
+ ")";
77+
return getName() + "(name=" + getName() + ", writeFlatVectorFormat=" + writeFormat + ")";
7478
}
7579
}

server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswBinaryQuantizedVectorsFormat.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ public ES93HnswBinaryQuantizedVectorsFormat() {
5353
*/
5454
public ES93HnswBinaryQuantizedVectorsFormat(int maxConn, int beamWidth, boolean useDirectIO, boolean useBFloat16) {
5555
super(NAME, maxConn, beamWidth);
56-
flatVectorsFormat = new ES93BinaryQuantizedVectorsFormat(useDirectIO, useBFloat16);
56+
flatVectorsFormat = new ES93BinaryQuantizedVectorsFormat(useBFloat16, useDirectIO);
5757
}
5858

5959
/**
@@ -76,7 +76,7 @@ public ES93HnswBinaryQuantizedVectorsFormat(
7676
ExecutorService mergeExec
7777
) {
7878
super(NAME, maxConn, beamWidth, numMergeWorkers, mergeExec);
79-
flatVectorsFormat = new ES93BinaryQuantizedVectorsFormat(useDirectIO, useBFloat16);
79+
flatVectorsFormat = new ES93BinaryQuantizedVectorsFormat(useBFloat16, useDirectIO);
8080
}
8181

8282
@Override

server/src/main/java/org/elasticsearch/index/codec/vectors/es93/OffHeapBFloat16VectorValues.java

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121

2222
import org.apache.lucene.codecs.hnsw.FlatVectorsScorer;
2323
import org.apache.lucene.codecs.lucene90.IndexedDISI;
24-
import org.apache.lucene.codecs.lucene95.HasIndexSlice;
2524
import org.apache.lucene.codecs.lucene95.OrdToDocDISIReaderConfiguration;
2625
import org.apache.lucene.index.FloatVectorValues;
2726
import org.apache.lucene.index.VectorEncoding;
@@ -36,7 +35,7 @@
3635

3736
import java.io.IOException;
3837

39-
abstract class OffHeapBFloat16VectorValues extends FloatVectorValues implements HasIndexSlice {
38+
abstract class OffHeapBFloat16VectorValues extends FloatVectorValues {
4039

4140
protected final int dimension;
4241
protected final int size;
@@ -62,7 +61,7 @@ abstract class OffHeapBFloat16VectorValues extends FloatVectorValues implements
6261
this.byteSize = byteSize;
6362
this.similarityFunction = similarityFunction;
6463
this.flatVectorsScorer = flatVectorsScorer;
65-
bfloatBytes = new byte[dimension * 2];
64+
bfloatBytes = new byte[dimension * BFloat16.BYTES];
6665
value = new float[dimension];
6766
}
6867

@@ -76,11 +75,6 @@ public int size() {
7675
return size;
7776
}
7877

79-
@Override
80-
public IndexInput getSlice() {
81-
return slice;
82-
}
83-
8478
@Override
8579
public float[] vectorValue(int targetOrd) throws IOException {
8680
if (lastOrd == targetOrd) {

server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2166,8 +2166,7 @@ KnnVectorsFormat getVectorsFormat(ElementType elementType) {
21662166
return new ES920DiskBBQVectorsFormat(
21672167
clusterSize,
21682168
ES920DiskBBQVectorsFormat.DEFAULT_CENTROIDS_PER_PARENT_CLUSTER,
2169-
onDiskRescore,
2170-
false
2169+
onDiskRescore
21712170
);
21722171
}
21732172

0 commit comments

Comments
 (0)