Skip to content

Commit 44edbec

Browse files
jimczimridula-s109
authored andcommitted
Speed up (filtered) KNN queries for flat vector fields (elastic#130251)
For dense vector fields using the `flat` index, we already know a brute-force search will be used—so there’s no need to go through the codec’s approximate KNN logic. This change skips that step and builds the brute-force query directly, making things faster and simpler. I tested this on a setup with **10 million random vectors**, each with **1596 dimensions** and **17,500 partitions**, using the `random_vector` track. The results: ### Performance Comparison | Metric | Before | After | Change | | ----------------- | --------- | ---------- | --------- | | **Throughput** | 221 ops/s | 2762 ops/s | 🟢 +1149% | | **Latency (p50)** | 29.2 ms | 1.6 ms | 🔻 -94.4% | | **Latency (p99)** | 81.6 ms | 3.5 ms | 🔻 -95.7% | Filtered KNN queries on flat vectors are now over 10x faster on my laptop!
1 parent 0db0804 commit 44edbec

File tree

11 files changed

+729
-97
lines changed

11 files changed

+729
-97
lines changed

docs/changelog/130251.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 130251
2+
summary: Speed up (filtered) KNN queries for flat vector fields
3+
area: Vector Search
4+
type: enhancement
5+
issues: []

qa/vector/src/main/java/org/elasticsearch/test/knn/KnnSearcher.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,7 @@ TopDocs doVectorQuery(float[] vector, IndexSearcher searcher) throws IOException
310310
}
311311
if (overSamplingFactor > 1f) {
312312
// oversample the topK results to get more candidates for the final result
313-
knnQuery = new RescoreKnnVectorQuery(VECTOR_FIELD, vector, similarityFunction, this.topK, knnQuery);
313+
knnQuery = RescoreKnnVectorQuery.fromInnerQuery(VECTOR_FIELD, vector, similarityFunction, this.topK, topK, knnQuery);
314314
}
315315
QueryProfiler profiler = new QueryProfiler();
316316
TopDocs docs = searcher.search(knnQuery, this.topK);

server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java

Lines changed: 104 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
import org.apache.lucene.index.SegmentWriteState;
3131
import org.apache.lucene.index.VectorEncoding;
3232
import org.apache.lucene.index.VectorSimilarityFunction;
33+
import org.apache.lucene.search.BooleanClause;
34+
import org.apache.lucene.search.BooleanQuery;
3335
import org.apache.lucene.search.FieldExistsQuery;
3436
import org.apache.lucene.search.Query;
3537
import org.apache.lucene.search.join.BitSetProducer;
@@ -77,6 +79,7 @@
7779
import org.elasticsearch.search.lookup.Source;
7880
import org.elasticsearch.search.vectors.DenseVectorQuery;
7981
import org.elasticsearch.search.vectors.DiversifyingChildrenIVFKnnFloatVectorQuery;
82+
import org.elasticsearch.search.vectors.DiversifyingParentBlockQuery;
8083
import org.elasticsearch.search.vectors.ESDiversifyingChildrenByteKnnVectorQuery;
8184
import org.elasticsearch.search.vectors.ESDiversifyingChildrenFloatKnnVectorQuery;
8285
import org.elasticsearch.search.vectors.ESKnnByteVectorQuery;
@@ -1391,6 +1394,18 @@ public final boolean equals(Object other) {
13911394
public final int hashCode() {
13921395
return Objects.hash(type, doHashCode());
13931396
}
1397+
1398+
/**
1399+
* Indicates whether the underlying vector search is performed using a flat (exhaustive) approach.
1400+
* <p>
1401+
* When {@code true}, it means the search does not use any approximate nearest neighbor (ANN)
1402+
* acceleration structures such as HNSW or IVF. Instead, it performs a brute-force comparison
1403+
* against all candidate vectors. This information can be used by higher-level components
1404+
* to decide whether additional acceleration or optimization is necessary.
1405+
*
1406+
* @return {@code true} if the vector search is flat (exhaustive), {@code false} if it uses ANN structures
1407+
*/
1408+
abstract boolean isFlat();
13941409
}
13951410

13961411
abstract static class QuantizedIndexOptions extends DenseVectorIndexOptions {
@@ -1762,6 +1777,11 @@ int doHashCode() {
17621777
return Objects.hash(confidenceInterval, rescoreVector);
17631778
}
17641779

1780+
@Override
1781+
boolean isFlat() {
1782+
return true;
1783+
}
1784+
17651785
@Override
17661786
public boolean updatableTo(DenseVectorIndexOptions update) {
17671787
return update.type.equals(this.type)
@@ -1810,6 +1830,11 @@ public boolean doEquals(DenseVectorIndexOptions o) {
18101830
public int doHashCode() {
18111831
return Objects.hash(type);
18121832
}
1833+
1834+
@Override
1835+
boolean isFlat() {
1836+
return true;
1837+
}
18131838
}
18141839

18151840
public static class Int4HnswIndexOptions extends QuantizedIndexOptions {
@@ -1860,6 +1885,11 @@ public int doHashCode() {
18601885
return Objects.hash(m, efConstruction, confidenceInterval, rescoreVector);
18611886
}
18621887

1888+
@Override
1889+
boolean isFlat() {
1890+
return false;
1891+
}
1892+
18631893
@Override
18641894
public String toString() {
18651895
return "{type="
@@ -1931,6 +1961,11 @@ public int doHashCode() {
19311961
return Objects.hash(confidenceInterval, rescoreVector);
19321962
}
19331963

1964+
@Override
1965+
boolean isFlat() {
1966+
return true;
1967+
}
1968+
19341969
@Override
19351970
public String toString() {
19361971
return "{type=" + type + ", confidence_interval=" + confidenceInterval + ", rescore_vector=" + rescoreVector + "}";
@@ -1999,6 +2034,11 @@ public int doHashCode() {
19992034
return Objects.hash(m, efConstruction, confidenceInterval, rescoreVector);
20002035
}
20012036

2037+
@Override
2038+
boolean isFlat() {
2039+
return false;
2040+
}
2041+
20022042
@Override
20032043
public String toString() {
20042044
return "{type="
@@ -2088,6 +2128,11 @@ public int doHashCode() {
20882128
return Objects.hash(m, efConstruction);
20892129
}
20902130

2131+
@Override
2132+
boolean isFlat() {
2133+
return false;
2134+
}
2135+
20912136
@Override
20922137
public String toString() {
20932138
return "{type=" + type + ", m=" + m + ", ef_construction=" + efConstruction + "}";
@@ -2126,6 +2171,11 @@ int doHashCode() {
21262171
return Objects.hash(m, efConstruction, rescoreVector);
21272172
}
21282173

2174+
@Override
2175+
boolean isFlat() {
2176+
return false;
2177+
}
2178+
21292179
@Override
21302180
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
21312181
builder.startObject();
@@ -2179,6 +2229,11 @@ int doHashCode() {
21792229
return CLASS_NAME_HASH;
21802230
}
21812231

2232+
@Override
2233+
boolean isFlat() {
2234+
return true;
2235+
}
2236+
21822237
@Override
21832238
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
21842239
builder.startObject();
@@ -2237,6 +2292,11 @@ int doHashCode() {
22372292
return Objects.hash(clusterSize, defaultNProbe, rescoreVector);
22382293
}
22392294

2295+
@Override
2296+
boolean isFlat() {
2297+
return false;
2298+
}
2299+
22402300
@Override
22412301
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
22422302
builder.startObject();
@@ -2485,9 +2545,21 @@ private Query createKnnBitQuery(
24852545
KnnSearchStrategy searchStrategy
24862546
) {
24872547
elementType.checkDimensions(dims, queryVector.length);
2488-
Query knnQuery = parentFilter != null
2489-
? new ESDiversifyingChildrenByteKnnVectorQuery(name(), queryVector, filter, k, numCands, parentFilter, searchStrategy)
2490-
: new ESKnnByteVectorQuery(name(), queryVector, k, numCands, filter, searchStrategy);
2548+
Query knnQuery;
2549+
if (indexOptions != null && indexOptions.isFlat()) {
2550+
var exactKnnQuery = parentFilter != null
2551+
? new DiversifyingParentBlockQuery(parentFilter, createExactKnnBitQuery(queryVector))
2552+
: createExactKnnBitQuery(queryVector);
2553+
knnQuery = filter == null
2554+
? exactKnnQuery
2555+
: new BooleanQuery.Builder().add(exactKnnQuery, BooleanClause.Occur.SHOULD)
2556+
.add(filter, BooleanClause.Occur.FILTER)
2557+
.build();
2558+
} else {
2559+
knnQuery = parentFilter != null
2560+
? new ESDiversifyingChildrenByteKnnVectorQuery(name(), queryVector, filter, k, numCands, parentFilter, searchStrategy)
2561+
: new ESKnnByteVectorQuery(name(), queryVector, k, numCands, filter, searchStrategy);
2562+
}
24912563
if (similarityThreshold != null) {
24922564
knnQuery = new VectorSimilarityQuery(
24932565
knnQuery,
@@ -2513,9 +2585,22 @@ private Query createKnnByteQuery(
25132585
float squaredMagnitude = VectorUtil.dotProduct(queryVector, queryVector);
25142586
elementType.checkVectorMagnitude(similarity, ElementType.errorByteElementsAppender(queryVector), squaredMagnitude);
25152587
}
2516-
Query knnQuery = parentFilter != null
2517-
? new ESDiversifyingChildrenByteKnnVectorQuery(name(), queryVector, filter, k, numCands, parentFilter, searchStrategy)
2518-
: new ESKnnByteVectorQuery(name(), queryVector, k, numCands, filter, searchStrategy);
2588+
2589+
Query knnQuery;
2590+
if (indexOptions != null && indexOptions.isFlat()) {
2591+
var exactKnnQuery = parentFilter != null
2592+
? new DiversifyingParentBlockQuery(parentFilter, createExactKnnByteQuery(queryVector))
2593+
: createExactKnnByteQuery(queryVector);
2594+
knnQuery = filter == null
2595+
? exactKnnQuery
2596+
: new BooleanQuery.Builder().add(exactKnnQuery, BooleanClause.Occur.SHOULD)
2597+
.add(filter, BooleanClause.Occur.FILTER)
2598+
.build();
2599+
} else {
2600+
knnQuery = parentFilter != null
2601+
? new ESDiversifyingChildrenByteKnnVectorQuery(name(), queryVector, filter, k, numCands, parentFilter, searchStrategy)
2602+
: new ESKnnByteVectorQuery(name(), queryVector, k, numCands, filter, searchStrategy);
2603+
}
25192604
if (similarityThreshold != null) {
25202605
knnQuery = new VectorSimilarityQuery(
25212606
knnQuery,
@@ -2568,7 +2653,16 @@ && isNotUnitVector(squaredMagnitude)) {
25682653
numCands = Math.max(adjustedK, numCands);
25692654
}
25702655
Query knnQuery;
2571-
if (indexOptions instanceof BBQIVFIndexOptions bbqIndexOptions) {
2656+
if (indexOptions != null && indexOptions.isFlat()) {
2657+
var exactKnnQuery = parentFilter != null
2658+
? new DiversifyingParentBlockQuery(parentFilter, createExactKnnFloatQuery(queryVector))
2659+
: createExactKnnFloatQuery(queryVector);
2660+
knnQuery = filter == null
2661+
? exactKnnQuery
2662+
: new BooleanQuery.Builder().add(exactKnnQuery, BooleanClause.Occur.SHOULD)
2663+
.add(filter, BooleanClause.Occur.FILTER)
2664+
.build();
2665+
} else if (indexOptions instanceof BBQIVFIndexOptions bbqIndexOptions) {
25722666
knnQuery = parentFilter != null
25732667
? new DiversifyingChildrenIVFKnnFloatVectorQuery(
25742668
name(),
@@ -2594,11 +2688,12 @@ && isNotUnitVector(squaredMagnitude)) {
25942688
: new ESKnnFloatVectorQuery(name(), queryVector, adjustedK, numCands, filter, knnSearchStrategy);
25952689
}
25962690
if (rescore) {
2597-
knnQuery = new RescoreKnnVectorQuery(
2691+
knnQuery = RescoreKnnVectorQuery.fromInnerQuery(
25982692
name(),
25992693
queryVector,
26002694
similarity.vectorSimilarityFunction(indexVersionCreated, ElementType.FLOAT),
26012695
k,
2696+
adjustedK,
26022697
knnQuery
26032698
);
26042699
}
@@ -2624,7 +2719,7 @@ ElementType getElementType() {
26242719
return elementType;
26252720
}
26262721

2627-
public IndexOptions getIndexOptions() {
2722+
public DenseVectorIndexOptions getIndexOptions() {
26282723
return indexOptions;
26292724
}
26302725

server/src/main/java/org/elasticsearch/search/vectors/DenseVectorQuery.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,4 +207,5 @@ public int docID() {
207207
return iterator.docID();
208208
}
209209
}
210+
210211
}

0 commit comments

Comments
 (0)