Skip to content

Commit 4ab3041

Browse files
committed
Speed up (filtered) KNN queries for flat vector fields
For dense vector fields using the `flat` index, we already know a brute-force search will be used—so there’s no need to go through the codec’s approximate KNN logic. This change skips that step and builds the brute-force query directly, making things faster and simpler. I tested this on a setup with **10 million random vectors**, each with **1596 dimensions** and **17,500 partitions**, using the `random_vector` track. The results: ### Performance Comparison | Metric | Before | After | Change | | ----------------- | --------- | ---------- | --------- | | **Throughput** | 221 ops/s | 2762 ops/s | 🟢 +1149% | | **Latency (p50)** | 29.2 ms | 1.6 ms | 🔻 -94.4% | | **Latency (p99)** | 81.6 ms | 3.5 ms | 🔻 -95.7% | Filtered KNN queries on flat vectors are now over 10x faster on my laptop!
1 parent b9360a4 commit 4ab3041

File tree

8 files changed

+366
-98
lines changed

8 files changed

+366
-98
lines changed

qa/vector/src/main/java/org/elasticsearch/test/knn/KnnSearcher.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,7 @@ TopDocs doVectorQuery(float[] vector, IndexSearcher searcher) throws IOException
309309
}
310310
if (overSamplingFactor > 1f) {
311311
// oversample the topK results to get more candidates for the final result
312-
knnQuery = new RescoreKnnVectorQuery(VECTOR_FIELD, vector, similarityFunction, this.topK, knnQuery);
312+
knnQuery = RescoreKnnVectorQuery.fromInnerQuery(VECTOR_FIELD, vector, similarityFunction, this.topK, topK, knnQuery);
313313
}
314314
QueryProfiler profiler = new QueryProfiler();
315315
TopDocs docs = searcher.search(knnQuery, this.topK);

server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java

Lines changed: 105 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,13 @@
3030
import org.apache.lucene.index.SegmentWriteState;
3131
import org.apache.lucene.index.VectorEncoding;
3232
import org.apache.lucene.index.VectorSimilarityFunction;
33+
import org.apache.lucene.search.BooleanClause;
34+
import org.apache.lucene.search.BooleanQuery;
3335
import org.apache.lucene.search.FieldExistsQuery;
3436
import org.apache.lucene.search.Query;
3537
import org.apache.lucene.search.join.BitSetProducer;
38+
import org.apache.lucene.search.join.ScoreMode;
39+
import org.apache.lucene.search.join.ToParentBlockJoinQuery;
3640
import org.apache.lucene.search.knn.KnnSearchStrategy;
3741
import org.apache.lucene.util.BitUtil;
3842
import org.apache.lucene.util.BytesRef;
@@ -1391,6 +1395,18 @@ public final boolean equals(Object other) {
13911395
public final int hashCode() {
13921396
return Objects.hash(type, doHashCode());
13931397
}
1398+
1399+
/**
1400+
* Indicates whether the underlying vector search is performed using a flat (exhaustive) approach.
1401+
* <p>
1402+
* When {@code true}, it means the search does not use any approximate nearest neighbor (ANN)
1403+
* acceleration structures such as HNSW or IVF. Instead, it performs a brute-force comparison
1404+
* against all candidate vectors. This information can be used by higher-level components
1405+
* to decide whether additional acceleration or optimization is necessary.
1406+
*
1407+
* @return {@code true} if the vector search is flat (exhaustive), {@code false} if it uses ANN structures
1408+
*/
1409+
abstract boolean isFlat();
13941410
}
13951411

13961412
abstract static class QuantizedIndexOptions extends DenseVectorIndexOptions {
@@ -1762,6 +1778,11 @@ int doHashCode() {
17621778
return Objects.hash(confidenceInterval, rescoreVector);
17631779
}
17641780

1781+
@Override
1782+
boolean isFlat() {
1783+
return true;
1784+
}
1785+
17651786
@Override
17661787
public boolean updatableTo(DenseVectorIndexOptions update) {
17671788
return update.type.equals(this.type)
@@ -1810,6 +1831,11 @@ public boolean doEquals(DenseVectorIndexOptions o) {
18101831
public int doHashCode() {
18111832
return Objects.hash(type);
18121833
}
1834+
1835+
@Override
1836+
boolean isFlat() {
1837+
return true;
1838+
}
18131839
}
18141840

18151841
public static class Int4HnswIndexOptions extends QuantizedIndexOptions {
@@ -1860,6 +1886,11 @@ public int doHashCode() {
18601886
return Objects.hash(m, efConstruction, confidenceInterval, rescoreVector);
18611887
}
18621888

1889+
@Override
1890+
boolean isFlat() {
1891+
return false;
1892+
}
1893+
18631894
@Override
18641895
public String toString() {
18651896
return "{type="
@@ -1931,6 +1962,11 @@ public int doHashCode() {
19311962
return Objects.hash(confidenceInterval, rescoreVector);
19321963
}
19331964

1965+
@Override
1966+
boolean isFlat() {
1967+
return true;
1968+
}
1969+
19341970
@Override
19351971
public String toString() {
19361972
return "{type=" + type + ", confidence_interval=" + confidenceInterval + ", rescore_vector=" + rescoreVector + "}";
@@ -1999,6 +2035,11 @@ public int doHashCode() {
19992035
return Objects.hash(m, efConstruction, confidenceInterval, rescoreVector);
20002036
}
20012037

2038+
@Override
2039+
boolean isFlat() {
2040+
return false;
2041+
}
2042+
20022043
@Override
20032044
public String toString() {
20042045
return "{type="
@@ -2088,6 +2129,11 @@ public int doHashCode() {
20882129
return Objects.hash(m, efConstruction);
20892130
}
20902131

2132+
@Override
2133+
boolean isFlat() {
2134+
return false;
2135+
}
2136+
20912137
@Override
20922138
public String toString() {
20932139
return "{type=" + type + ", m=" + m + ", ef_construction=" + efConstruction + "}";
@@ -2126,6 +2172,11 @@ int doHashCode() {
21262172
return Objects.hash(m, efConstruction, rescoreVector);
21272173
}
21282174

2175+
@Override
2176+
boolean isFlat() {
2177+
return false;
2178+
}
2179+
21292180
@Override
21302181
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
21312182
builder.startObject();
@@ -2179,6 +2230,11 @@ int doHashCode() {
21792230
return CLASS_NAME_HASH;
21802231
}
21812232

2233+
@Override
2234+
boolean isFlat() {
2235+
return true;
2236+
}
2237+
21822238
@Override
21832239
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
21842240
builder.startObject();
@@ -2237,6 +2293,11 @@ int doHashCode() {
22372293
return Objects.hash(clusterSize, defaultNProbe, rescoreVector);
22382294
}
22392295

2296+
@Override
2297+
boolean isFlat() {
2298+
return false;
2299+
}
2300+
22402301
@Override
22412302
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
22422303
builder.startObject();
@@ -2485,9 +2546,21 @@ private Query createKnnBitQuery(
24852546
KnnSearchStrategy searchStrategy
24862547
) {
24872548
elementType.checkDimensions(dims, queryVector.length);
2488-
Query knnQuery = parentFilter != null
2489-
? new ESDiversifyingChildrenByteKnnVectorQuery(name(), queryVector, filter, k, numCands, parentFilter, searchStrategy)
2490-
: new ESKnnByteVectorQuery(name(), queryVector, k, numCands, filter, searchStrategy);
2549+
Query knnQuery;
2550+
if (indexOptions.isFlat()) {
2551+
knnQuery = filter == null
2552+
? createExactKnnBitQuery(queryVector)
2553+
: new BooleanQuery.Builder().add(createExactKnnBitQuery(queryVector), BooleanClause.Occur.SHOULD)
2554+
.add(filter, BooleanClause.Occur.FILTER)
2555+
.build();
2556+
if (parentFilter != null) {
2557+
knnQuery = new ToParentBlockJoinQuery(knnQuery, parentFilter, ScoreMode.Max);
2558+
}
2559+
} else {
2560+
knnQuery = parentFilter != null
2561+
? new ESDiversifyingChildrenByteKnnVectorQuery(name(), queryVector, filter, k, numCands, parentFilter, searchStrategy)
2562+
: new ESKnnByteVectorQuery(name(), queryVector, k, numCands, filter, searchStrategy);
2563+
}
24912564
if (similarityThreshold != null) {
24922565
knnQuery = new VectorSimilarityQuery(
24932566
knnQuery,
@@ -2513,9 +2586,22 @@ private Query createKnnByteQuery(
25132586
float squaredMagnitude = VectorUtil.dotProduct(queryVector, queryVector);
25142587
elementType.checkVectorMagnitude(similarity, ElementType.errorByteElementsAppender(queryVector), squaredMagnitude);
25152588
}
2516-
Query knnQuery = parentFilter != null
2517-
? new ESDiversifyingChildrenByteKnnVectorQuery(name(), queryVector, filter, k, numCands, parentFilter, searchStrategy)
2518-
: new ESKnnByteVectorQuery(name(), queryVector, k, numCands, filter, searchStrategy);
2589+
2590+
Query knnQuery;
2591+
if (indexOptions.isFlat()) {
2592+
knnQuery = filter == null
2593+
? createExactKnnByteQuery(queryVector)
2594+
: new BooleanQuery.Builder().add(createExactKnnByteQuery(queryVector), BooleanClause.Occur.SHOULD)
2595+
.add(filter, BooleanClause.Occur.FILTER)
2596+
.build();
2597+
if (parentFilter != null) {
2598+
knnQuery = new ToParentBlockJoinQuery(knnQuery, parentFilter, ScoreMode.Max);
2599+
}
2600+
} else {
2601+
knnQuery = parentFilter != null
2602+
? new ESDiversifyingChildrenByteKnnVectorQuery(name(), queryVector, filter, k, numCands, parentFilter, searchStrategy)
2603+
: new ESKnnByteVectorQuery(name(), queryVector, k, numCands, filter, searchStrategy);
2604+
}
25192605
if (similarityThreshold != null) {
25202606
knnQuery = new VectorSimilarityQuery(
25212607
knnQuery,
@@ -2568,7 +2654,16 @@ && isNotUnitVector(squaredMagnitude)) {
25682654
numCands = Math.max(adjustedK, numCands);
25692655
}
25702656
Query knnQuery;
2571-
if (indexOptions instanceof BBQIVFIndexOptions bbqIndexOptions) {
2657+
if (indexOptions.isFlat()) {
2658+
knnQuery = filter == null
2659+
? createExactKnnFloatQuery(queryVector)
2660+
: new BooleanQuery.Builder().add(createExactKnnFloatQuery(queryVector), BooleanClause.Occur.SHOULD)
2661+
.add(filter, BooleanClause.Occur.FILTER)
2662+
.build();
2663+
if (parentFilter != null) {
2664+
knnQuery = new ToParentBlockJoinQuery(knnQuery, parentFilter, ScoreMode.Max);
2665+
}
2666+
} else if (indexOptions instanceof BBQIVFIndexOptions bbqIndexOptions) {
25722667
knnQuery = parentFilter != null
25732668
? new DiversifyingChildrenIVFKnnFloatVectorQuery(
25742669
name(),
@@ -2594,11 +2689,12 @@ && isNotUnitVector(squaredMagnitude)) {
25942689
: new ESKnnFloatVectorQuery(name(), queryVector, adjustedK, numCands, filter, knnSearchStrategy);
25952690
}
25962691
if (rescore) {
2597-
knnQuery = new RescoreKnnVectorQuery(
2692+
knnQuery = RescoreKnnVectorQuery.fromInnerQuery(
25982693
name(),
25992694
queryVector,
26002695
similarity.vectorSimilarityFunction(indexVersionCreated, ElementType.FLOAT),
26012696
k,
2697+
adjustedK,
26022698
knnQuery
26032699
);
26042700
}
@@ -2624,7 +2720,7 @@ ElementType getElementType() {
26242720
return elementType;
26252721
}
26262722

2627-
public IndexOptions getIndexOptions() {
2723+
public DenseVectorIndexOptions getIndexOptions() {
26282724
return indexOptions;
26292725
}
26302726

server/src/main/java/org/elasticsearch/search/vectors/DenseVectorQuery.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,4 +207,5 @@ public int docID() {
207207
return iterator.docID();
208208
}
209209
}
210+
210211
}

0 commit comments

Comments
 (0)