Skip to content

Commit c4c2218

Browse files
committed
Add CLI args for KnnIndexTester
1 parent 688ea6d commit c4c2218

File tree

5 files changed

+38
-7
lines changed

5 files changed

+38
-7
lines changed

qa/vector/src/main/java/org/elasticsearch/test/knn/CmdLineArgs.java

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,9 @@ record CmdLineArgs(
4747
VectorSimilarityFunction vectorSpace,
4848
int quantizeBits,
4949
VectorEncoding vectorEncoding,
50-
int dimensions
50+
int dimensions,
51+
boolean useNewFlatVectorsFormat,
52+
int quantizeQueryBits
5153
) implements ToXContentObject {
5254

5355
static final ParseField DOC_VECTORS_FIELD = new ParseField("doc_vectors");
@@ -70,6 +72,9 @@ record CmdLineArgs(
7072
static final ParseField QUANTIZE_BITS_FIELD = new ParseField("quantize_bits");
7173
static final ParseField VECTOR_ENCODING_FIELD = new ParseField("vector_encoding");
7274
static final ParseField DIMENSIONS_FIELD = new ParseField("dimensions");
75+
static final ParseField QUERY_BITS_FIELD = new ParseField("query_bits");
76+
static final ParseField USE_NEW_FLAT_VECTORS_FORMAT_FIELD = new ParseField("use_new_flat_vectors_format");
77+
static final ParseField QUANTIZE_QUERY_BITS_FIELD = new ParseField("quantize_query_bits");
7378

7479
static CmdLineArgs fromXContent(XContentParser parser) throws IOException {
7580
Builder builder = PARSER.apply(parser, null);
@@ -99,6 +104,8 @@ static CmdLineArgs fromXContent(XContentParser parser) throws IOException {
99104
PARSER.declareInt(Builder::setQuantizeBits, QUANTIZE_BITS_FIELD);
100105
PARSER.declareString(Builder::setVectorEncoding, VECTOR_ENCODING_FIELD);
101106
PARSER.declareInt(Builder::setDimensions, DIMENSIONS_FIELD);
107+
PARSER.declareBoolean(Builder::setUseNewFlatVectorsFormat, USE_NEW_FLAT_VECTORS_FORMAT_FIELD);
108+
PARSER.declareInt(Builder::setQuantizeQueryBits, QUANTIZE_QUERY_BITS_FIELD);
102109
}
103110

104111
@Override
@@ -128,6 +135,8 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
128135
builder.field(QUANTIZE_BITS_FIELD.getPreferredName(), quantizeBits);
129136
builder.field(VECTOR_ENCODING_FIELD.getPreferredName(), vectorEncoding.name().toLowerCase(Locale.ROOT));
130137
builder.field(DIMENSIONS_FIELD.getPreferredName(), dimensions);
138+
builder.field(USE_NEW_FLAT_VECTORS_FORMAT_FIELD.getPreferredName(), useNewFlatVectorsFormat);
139+
builder.field(QUANTIZE_QUERY_BITS_FIELD.getPreferredName(), quantizeQueryBits);
131140
return builder.endObject();
132141
}
133142

@@ -157,6 +166,8 @@ static class Builder {
157166
private int quantizeBits = 8;
158167
private VectorEncoding vectorEncoding = VectorEncoding.FLOAT32;
159168
private int dimensions;
169+
private boolean useNewFlatVectorsFormat = false;
170+
private int quantizeQueryBits = 8;
160171

161172
public Builder setDocVectors(String docVectors) {
162173
this.docVectors = PathUtils.get(docVectors);
@@ -258,6 +269,16 @@ public Builder setDimensions(int dimensions) {
258269
return this;
259270
}
260271

272+
public Builder setUseNewFlatVectorsFormat(boolean useNewFlatVectorsFormat) {
273+
this.useNewFlatVectorsFormat = useNewFlatVectorsFormat;
274+
return this;
275+
}
276+
277+
public Builder setQuantizeQueryBits(int quantizeQueryBits) {
278+
this.quantizeQueryBits = quantizeQueryBits;
279+
return this;
280+
}
281+
261282
public CmdLineArgs build() {
262283
if (docVectors == null) {
263284
throw new IllegalArgumentException("Document vectors path must be provided");
@@ -285,7 +306,9 @@ public CmdLineArgs build() {
285306
vectorSpace,
286307
quantizeBits,
287308
vectorEncoding,
288-
dimensions
309+
dimensions,
310+
useNewFlatVectorsFormat,
311+
quantizeQueryBits
289312
);
290313
}
291314
}

qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import org.elasticsearch.index.codec.vectors.IVFVectorsFormat;
2626
import org.elasticsearch.index.codec.vectors.es818.ES818BinaryQuantizedVectorsFormat;
2727
import org.elasticsearch.index.codec.vectors.es818.ES818HnswBinaryQuantizedVectorsFormat;
28+
import org.elasticsearch.index.codec.vectors.es910.ES910BinaryQuantizedVectorsFormat;
2829
import org.elasticsearch.logging.Level;
2930
import org.elasticsearch.logging.LogManager;
3031
import org.elasticsearch.logging.Logger;
@@ -91,7 +92,10 @@ static Codec createCodec(CmdLineArgs args) {
9192
if (args.indexType() == IndexType.IVF) {
9293
format = new IVFVectorsFormat(args.ivfClusterSize());
9394
} else {
94-
if (args.quantizeBits() == 1) {
95+
if (args.useNewFlatVectorsFormat() && args.indexType() == IndexType.FLAT) {
96+
logger.warn("Using new flat vectors format for index type FLAT");
97+
format = new ES910BinaryQuantizedVectorsFormat((byte) args.quantizeBits(), (byte) args.quantizeQueryBits());
98+
} else if (args.quantizeBits() == 1) {
9599
if (args.indexType() == IndexType.FLAT) {
96100
format = new ES818BinaryQuantizedVectorsFormat();
97101
} else {
@@ -113,6 +117,7 @@ static Codec createCodec(CmdLineArgs args) {
113117
format = new Lucene99HnswVectorsFormat(args.hnswM(), args.hnswEfConstruction(), 1, null);
114118
}
115119
}
120+
logger.info("Using vector format: " + format);
116121
return new Lucene101Codec() {
117122
@Override
118123
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {

server/src/main/java/module-info.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -483,4 +483,5 @@
483483
exports org.elasticsearch.lucene.search;
484484
exports org.elasticsearch.index.codec.vectors to org.elasticsearch.test.knn;
485485
exports org.elasticsearch.index.codec.vectors.es818 to org.elasticsearch.test.knn;
486+
exports org.elasticsearch.index.codec.vectors.es910;
486487
}

server/src/main/java/org/elasticsearch/index/codec/vectors/OptimizedScalarQuantizer.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,8 @@ public QuantizationResult scalarQuantize(float[] vector, byte[] destination, byt
132132
int assignment = Math.round((xi - a) / step);
133133
sumQuery += assignment;
134134
destination[h] = (byte) assignment;
135+
assert assignment >= 0 && assignment < points
136+
: "assignment: " + assignment + ", a: " + a + ", b: " + b + ", step: " + step + ", xi: " + xi;
135137
}
136138
return new QuantizationResult(
137139
intervalScratch[0],

server/src/main/java/org/elasticsearch/index/codec/vectors/es910/ES910BinaryFlatVectorsScorer.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,9 @@ public RandomVectorScorer getRandomVectorScorer(
7272
if (similarityFunction == COSINE) {
7373
VectorUtil.l2normalize(copy);
7474
}
75-
byte[] quantized = new byte[copy.length];
76-
OptimizedScalarQuantizer.QuantizationResult queryCorrections = quantizer.scalarQuantize(copy, quantized, queryBits, centroid);
75+
target = copy;
76+
byte[] quantized = new byte[target.length];
77+
OptimizedScalarQuantizer.QuantizationResult queryCorrections = quantizer.scalarQuantize(target, quantized, queryBits, centroid);
7778
return new RandomVectorScorer.AbstractRandomVectorScorer(vectorValues) {
7879
@Override
7980
public float score(int i) throws IOException {
@@ -116,7 +117,7 @@ RandomVectorScorerSupplier getRandomVectorScorerSupplier(
116117

117118
@Override
118119
public String toString() {
119-
return "ES910BinaryFlatVectorsScorer(nonQuantizedDelegate=" + nonQuantizedDelegate + ")";
120+
return "ES910BinaryFlatVectorsScorer(nonQuantizedDelegate=" + nonQuantizedDelegate + ", queryBits = " + queryBits + ")";
120121
}
121122

122123
/** Vector scorer supplier over binarized vector values */
@@ -171,7 +172,6 @@ public static class BinarizedRandomVectorScorer extends UpdateableRandomVectorSc
171172
this.targetVectors = targetVectors;
172173
this.similarityFunction = similarityFunction;
173174
bitScale = 1.0F / (float) ((1 << queryBits) - 1);
174-
175175
}
176176

177177
@Override

0 commit comments

Comments
 (0)