Skip to content

Commit c7c5d4a

Browse files
benwtrentelasticsearchmachine
andauthored
Add filtering for kNN vector indexer test scenarios (#130751)
* Add filtering for kNN vector indexer test scenarios * [CI] Auto commit changes from spotless --------- Co-authored-by: elasticsearchmachine <[email protected]>
1 parent 2f75277 commit c7c5d4a

File tree

3 files changed

+182
-25
lines changed

3 files changed

+182
-25
lines changed

qa/vector/src/main/java/org/elasticsearch/test/knn/CmdLineArgs.java

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ record CmdLineArgs(
4646
int indexThreads,
4747
boolean reindex,
4848
boolean forceMerge,
49+
float filterSelectivity,
50+
long seed,
4951
VectorSimilarityFunction vectorSpace,
5052
int quantizeBits,
5153
VectorEncoding vectorEncoding,
@@ -75,6 +77,8 @@ record CmdLineArgs(
7577
static final ParseField VECTOR_ENCODING_FIELD = new ParseField("vector_encoding");
7678
static final ParseField DIMENSIONS_FIELD = new ParseField("dimensions");
7779
static final ParseField EARLY_TERMINATION_FIELD = new ParseField("early_termination");
80+
static final ParseField FILTER_SELECTIVITY_FIELD = new ParseField("filter_selectivity");
81+
static final ParseField SEED_FIELD = new ParseField("seed");
7882

7983
static CmdLineArgs fromXContent(XContentParser parser) throws IOException {
8084
Builder builder = PARSER.apply(parser, null);
@@ -106,6 +110,8 @@ static CmdLineArgs fromXContent(XContentParser parser) throws IOException {
106110
PARSER.declareString(Builder::setVectorEncoding, VECTOR_ENCODING_FIELD);
107111
PARSER.declareInt(Builder::setDimensions, DIMENSIONS_FIELD);
108112
PARSER.declareBoolean(Builder::setEarlyTermination, EARLY_TERMINATION_FIELD);
113+
PARSER.declareFloat(Builder::setFilterSelectivity, FILTER_SELECTIVITY_FIELD);
114+
PARSER.declareLong(Builder::setSeed, SEED_FIELD);
109115
}
110116

111117
@Override
@@ -136,6 +142,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
136142
builder.field(QUANTIZE_BITS_FIELD.getPreferredName(), quantizeBits);
137143
builder.field(VECTOR_ENCODING_FIELD.getPreferredName(), vectorEncoding.name().toLowerCase(Locale.ROOT));
138144
builder.field(DIMENSIONS_FIELD.getPreferredName(), dimensions);
145+
builder.field(EARLY_TERMINATION_FIELD.getPreferredName(), earlyTermination);
146+
builder.field(FILTER_SELECTIVITY_FIELD.getPreferredName(), filterSelectivity);
147+
builder.field(SEED_FIELD.getPreferredName(), seed);
139148
return builder.endObject();
140149
}
141150

@@ -167,6 +176,8 @@ static class Builder {
167176
private VectorEncoding vectorEncoding = VectorEncoding.FLOAT32;
168177
private int dimensions;
169178
private boolean earlyTermination;
179+
private float filterSelectivity = 1f;
180+
private long seed = 1751900822751L;
170181

171182
public Builder setDocVectors(String docVectors) {
172183
this.docVectors = PathUtils.get(docVectors);
@@ -278,6 +289,16 @@ public Builder setEarlyTermination(Boolean patience) {
278289
return this;
279290
}
280291

292+
public Builder setFilterSelectivity(float filterSelectivity) {
293+
this.filterSelectivity = filterSelectivity;
294+
return this;
295+
}
296+
297+
public Builder setSeed(long seed) {
298+
this.seed = seed;
299+
return this;
300+
}
301+
281302
public CmdLineArgs build() {
282303
if (docVectors == null) {
283304
throw new IllegalArgumentException("Document vectors path must be provided");
@@ -305,6 +326,8 @@ public CmdLineArgs build() {
305326
indexThreads,
306327
reindex,
307328
forceMerge,
329+
filterSelectivity,
330+
seed,
308331
vectorSpace,
309332
quantizeBits,
310333
vectorEncoding,

qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -178,10 +178,20 @@ public static void main(String[] args) throws Exception {
178178
? cmdLineArgs.nProbes()
179179
: new int[] { 0 };
180180
String indexType = cmdLineArgs.indexType().name().toLowerCase(Locale.ROOT);
181-
Results indexResults = new Results(cmdLineArgs.docVectors().getFileName().toString(), indexType, cmdLineArgs.numDocs());
181+
Results indexResults = new Results(
182+
cmdLineArgs.docVectors().getFileName().toString(),
183+
indexType,
184+
cmdLineArgs.numDocs(),
185+
cmdLineArgs.filterSelectivity()
186+
);
182187
Results[] results = new Results[nProbes.length];
183188
for (int i = 0; i < nProbes.length; i++) {
184-
results[i] = new Results(cmdLineArgs.docVectors().getFileName().toString(), indexType, cmdLineArgs.numDocs());
189+
results[i] = new Results(
190+
cmdLineArgs.docVectors().getFileName().toString(),
191+
indexType,
192+
cmdLineArgs.numDocs(),
193+
cmdLineArgs.filterSelectivity()
194+
);
185195
}
186196
logger.info("Running KNN index tester with arguments: " + cmdLineArgs);
187197
Codec codec = createCodec(cmdLineArgs);
@@ -244,7 +254,8 @@ public String toString() {
244254
"avg_cpu_count",
245255
"QPS",
246256
"recall",
247-
"visited" };
257+
"visited",
258+
"filter_selectivity" };
248259

249260
// Calculate appropriate column widths based on headers and data
250261

@@ -274,7 +285,8 @@ public String toString() {
274285
String.format(Locale.ROOT, "%.2f", queryResult.avgCpuCount),
275286
String.format(Locale.ROOT, "%.2f", queryResult.qps),
276287
String.format(Locale.ROOT, "%.2f", queryResult.avgRecall),
277-
String.format(Locale.ROOT, "%.2f", queryResult.averageVisited) };
288+
String.format(Locale.ROOT, "%.2f", queryResult.averageVisited),
289+
String.format(Locale.ROOT, "%.2f", queryResult.filterSelectivity), };
278290
}
279291

280292
printBlock(sb, searchHeaders, queryResultsArray);
@@ -339,6 +351,7 @@ private int[] calculateColumnWidths(String[] headers, String[]... data) {
339351
static class Results {
340352
final String indexType, indexName;
341353
final int numDocs;
354+
final float filterSelectivity;
342355
long indexTimeMS;
343356
long forceMergeTimeMS;
344357
int numSegments;
@@ -350,10 +363,11 @@ static class Results {
350363
double netCpuTimeMS;
351364
double avgCpuCount;
352365

353-
Results(String indexName, String indexType, int numDocs) {
366+
Results(String indexName, String indexType, int numDocs, float filterSelectivity) {
354367
this.indexName = indexName;
355368
this.indexType = indexType;
356369
this.numDocs = numDocs;
370+
this.filterSelectivity = filterSelectivity;
357371
}
358372
}
359373

0 commit comments

Comments
 (0)