Skip to content

Commit 6e27ccb

Browse files
committed
Merge branch 'bfloat16-vector-format' into bbq_hnsw-bfloat16
2 parents 207475c + 5e58904 commit 6e27ccb

File tree

33 files changed

+3681
-50
lines changed

33 files changed

+3681
-50
lines changed

qa/vector/src/main/java/org/elasticsearch/test/knn/CmdLineArgs.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ record CmdLineArgs(
4949
float filterSelectivity,
5050
long seed,
5151
VectorSimilarityFunction vectorSpace,
52+
int rawVectorSize,
5253
int quantizeBits,
5354
VectorEncoding vectorEncoding,
5455
int dimensions,
@@ -76,6 +77,7 @@ record CmdLineArgs(
7677
static final ParseField FORCE_MERGE_FIELD = new ParseField("force_merge");
7778
static final ParseField VECTOR_SPACE_FIELD = new ParseField("vector_space");
7879
static final ParseField QUANTIZE_BITS_FIELD = new ParseField("quantize_bits");
80+
static final ParseField RAW_VECTOR_SIZE_FIELD = new ParseField("raw_vector_size");
7981
static final ParseField VECTOR_ENCODING_FIELD = new ParseField("vector_encoding");
8082
static final ParseField DIMENSIONS_FIELD = new ParseField("dimensions");
8183
static final ParseField EARLY_TERMINATION_FIELD = new ParseField("early_termination");
@@ -110,6 +112,7 @@ static CmdLineArgs fromXContent(XContentParser parser) throws IOException {
110112
PARSER.declareBoolean(Builder::setReindex, REINDEX_FIELD);
111113
PARSER.declareBoolean(Builder::setForceMerge, FORCE_MERGE_FIELD);
112114
PARSER.declareString(Builder::setVectorSpace, VECTOR_SPACE_FIELD);
115+
PARSER.declareInt(Builder::setRawVectorSize, RAW_VECTOR_SIZE_FIELD);
113116
PARSER.declareInt(Builder::setQuantizeBits, QUANTIZE_BITS_FIELD);
114117
PARSER.declareString(Builder::setVectorEncoding, VECTOR_ENCODING_FIELD);
115118
PARSER.declareInt(Builder::setDimensions, DIMENSIONS_FIELD);
@@ -146,6 +149,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
146149
builder.field(REINDEX_FIELD.getPreferredName(), reindex);
147150
builder.field(FORCE_MERGE_FIELD.getPreferredName(), forceMerge);
148151
builder.field(VECTOR_SPACE_FIELD.getPreferredName(), vectorSpace.name().toLowerCase(Locale.ROOT));
152+
builder.field(RAW_VECTOR_SIZE_FIELD.getPreferredName(), rawVectorSize);
149153
builder.field(QUANTIZE_BITS_FIELD.getPreferredName(), quantizeBits);
150154
builder.field(VECTOR_ENCODING_FIELD.getPreferredName(), vectorEncoding.name().toLowerCase(Locale.ROOT));
151155
builder.field(DIMENSIONS_FIELD.getPreferredName(), dimensions);
@@ -179,6 +183,7 @@ static class Builder {
179183
private boolean reindex = false;
180184
private boolean forceMerge = false;
181185
private VectorSimilarityFunction vectorSpace = VectorSimilarityFunction.EUCLIDEAN;
186+
private int rawVectorSize = 32;
182187
private int quantizeBits = 8;
183188
private VectorEncoding vectorEncoding = VectorEncoding.FLOAT32;
184189
private int dimensions;
@@ -281,6 +286,11 @@ public Builder setVectorSpace(String vectorSpace) {
281286
return this;
282287
}
283288

289+
public Builder setRawVectorSize(int rawVectorSize) {
290+
this.rawVectorSize = rawVectorSize;
291+
return this;
292+
}
293+
284294
public Builder setQuantizeBits(int quantizeBits) {
285295
this.quantizeBits = quantizeBits;
286296
return this;
@@ -346,6 +356,7 @@ public CmdLineArgs build() {
346356
filterSelectivity,
347357
seed,
348358
vectorSpace,
359+
rawVectorSize,
349360
quantizeBits,
350361
vectorEncoding,
351362
dimensions,

qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@
3333
import org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat;
3434
import org.elasticsearch.index.codec.vectors.es818.ES818BinaryQuantizedVectorsFormat;
3535
import org.elasticsearch.index.codec.vectors.es818.ES818HnswBinaryQuantizedVectorsFormat;
36+
import org.elasticsearch.index.codec.vectors.es92.ES92BinaryQuantizedBFloat16VectorsFormat;
37+
import org.elasticsearch.index.codec.vectors.es92.ES92HnswBinaryQuantizedBFloat16VectorsFormat;
3638
import org.elasticsearch.logging.Level;
3739
import org.elasticsearch.logging.LogManager;
3840
import org.elasticsearch.logging.Logger;
@@ -125,9 +127,17 @@ static Codec createCodec(CmdLineArgs args) {
125127
} else {
126128
if (args.quantizeBits() == 1) {
127129
if (args.indexType() == IndexType.FLAT) {
128-
format = new ES818BinaryQuantizedVectorsFormat();
130+
if (args.rawVectorSize() == 16) {
131+
format = new ES92BinaryQuantizedBFloat16VectorsFormat();
132+
} else {
133+
format = new ES818BinaryQuantizedVectorsFormat();
134+
}
129135
} else {
130-
format = new ES818HnswBinaryQuantizedVectorsFormat(args.hnswM(), args.hnswEfConstruction(), 1, null);
136+
if (args.rawVectorSize() == 16) {
137+
format = new ES92HnswBinaryQuantizedBFloat16VectorsFormat(args.hnswM(), args.hnswEfConstruction(), 1, null);
138+
} else {
139+
format = new ES818HnswBinaryQuantizedVectorsFormat(args.hnswM(), args.hnswEfConstruction(), 1, null);
140+
}
131141
}
132142
} else if (args.quantizeBits() < 32) {
133143
if (args.indexType() == IndexType.FLAT) {

0 commit comments

Comments
 (0)