Skip to content
Merged
Show file tree
Hide file tree
Changes from 39 commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
ee3c806
Make ScoreOperator and LuceneQueryEvaluator more robust
carlosdelest Aug 13, 2025
82779ec
Translate to exact NN when not pushable
carlosdelest Aug 13, 2025
0fb162a
KNN k is set via optimizer and limit
carlosdelest Aug 14, 2025
5694ae7
Fix KnnFunctionIT test
carlosdelest Aug 14, 2025
3e68d71
[CI] Auto commit changes from spotless
Aug 14, 2025
1b2829b
Fix CSV tests
carlosdelest Aug 14, 2025
e3dd487
Use min_candidates
carlosdelest Aug 14, 2025
38cfe1d
Bump capability
carlosdelest Aug 14, 2025
e4ef60f
Merge remote-tracking branch 'carlosdelest/non-issue/esql-knn-exact-s…
carlosdelest Aug 14, 2025
d92a5dd
[CI] Auto commit changes from spotless
Aug 14, 2025
66e3dcb
Fix tests
carlosdelest Aug 28, 2025
71d3a48
Fix min_candidates handling
carlosdelest Aug 28, 2025
eb47c7b
Fix tests
carlosdelest Aug 28, 2025
3daf953
Add tests
carlosdelest Aug 28, 2025
5e2d026
Merge remote-tracking branch 'origin/main' into non-issue/esql-knn-ex…
carlosdelest Aug 28, 2025
5d1f694
Merge remote-tracking branch 'carlosdelest/non-issue/esql-knn-exact-s…
carlosdelest Aug 28, 2025
db5c018
Fix tests
carlosdelest Aug 28, 2025
952a7c9
Spotless
carlosdelest Aug 28, 2025
f499025
Fix tests
carlosdelest Aug 28, 2025
09e02da
Fix generated docs
carlosdelest Aug 28, 2025
69e7731
Merge branch 'main' into non-issue/esql-knn-exact-search-non-pushed
carlosdelest Aug 28, 2025
52f75f1
Merge remote-tracking branch 'origin/main' into non-issue/esql-knn-ex…
carlosdelest Aug 28, 2025
6cbf31a
Add docs and fix equals / hashCode
carlosdelest Aug 28, 2025
932a7c7
Merge remote-tracking branch 'carlosdelest/non-issue/esql-knn-exact-s…
carlosdelest Aug 28, 2025
e55ff48
equals / hashCode were good as they were
carlosdelest Aug 28, 2025
9fa0fd6
Fix tests
carlosdelest Aug 28, 2025
21ff308
Merge branch 'main' into non-issue/esql-knn-exact-search-non-pushed
carlosdelest Aug 29, 2025
a921df5
Fix docs
carlosdelest Aug 29, 2025
de45fb0
Merge remote-tracking branch 'carlosdelest/non-issue/esql-knn-exact-s…
carlosdelest Aug 29, 2025
2173a0c
Merge branch 'main' into non-issue/esql-knn-exact-search-non-pushed
carlosdelest Aug 29, 2025
9d3c85f
Verify that knn has a limit
carlosdelest Sep 1, 2025
e95a5bc
Fix tests
carlosdelest Sep 1, 2025
a2cdfbc
Merge remote-tracking branch 'origin/main' into non-issue/esql-knn-ex…
carlosdelest Sep 1, 2025
d11d074
Spotless
carlosdelest Sep 1, 2025
09d365c
Merge remote-tracking branch 'carlosdelest/non-issue/esql-knn-exact-s…
carlosdelest Sep 1, 2025
af3296c
Add CSV tests for stats / rerank
carlosdelest Sep 1, 2025
e9ed8e5
Fix CSV test
carlosdelest Sep 1, 2025
9fe7f90
Fix rerank test
carlosdelest Sep 1, 2025
8fe374d
Improve rerank test
carlosdelest Sep 1, 2025
c4f3da7
Add test for multiple limits combination
carlosdelest Sep 3, 2025
c19a43a
Merge branch 'main' into non-issue/esql-knn-exact-search-non-pushed
carlosdelest Sep 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,16 @@ protected LuceneQueryEvaluator(BlockFactory blockFactory, ShardConfig[] shards)
}

public Block executeQuery(Page page) {
// Lucene based operators retrieve DocVectors as first block
Block block = page.getBlock(0);
assert block instanceof DocBlock : "LuceneQueryExpressionEvaluator expects DocBlock as input";
DocVector docs = (DocVector) block.asVector();
// Search for DocVector block
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor unrelated change - Retrieves the doc block instead of hardcoding it

Block docBlock = null;
for (int i = 0; i < page.getBlockCount(); i++) {
if (page.getBlock(i) instanceof DocBlock) {
docBlock = page.getBlock(i);
break;
}
}
assert docBlock != null : "LuceneQueryExpressionEvaluator expects a DocBlock";
DocVector docs = (DocVector) docBlock.asVector();
try {
if (docs.singleSegmentNonDecreasing()) {
return evalSingleSegmentNonDecreasing(docs);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

import org.elasticsearch.compute.data.Block;
import org.elasticsearch.compute.data.BlockFactory;
import org.elasticsearch.compute.data.DocVector;
import org.elasticsearch.compute.data.DoubleBlock;
import org.elasticsearch.compute.data.DoubleVector;
import org.elasticsearch.compute.data.Page;
Expand Down Expand Up @@ -46,9 +45,9 @@ public ScoreOperator(BlockFactory blockFactory, ExpressionScorer scorer, int sco

@Override
protected Page process(Page page) {
assert page.getBlockCount() >= 2 : "Expected at least 2 blocks, got " + page.getBlockCount();
assert page.getBlock(0).asVector() instanceof DocVector : "Expected a DocVector, got " + page.getBlock(0).asVector();
assert page.getBlock(1).asVector() instanceof DoubleVector : "Expected a DoubleVector, got " + page.getBlock(1).asVector();
assert page.getBlockCount() > scoreBlockPosition : "Expected to get a score block in position " + scoreBlockPosition;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor unrelated change - removes unnecessary assertions and uses a non-hardcoded position

assert page.getBlock(scoreBlockPosition).asVector() instanceof DoubleVector
: "Expected a DoubleVector as a score block, got " + page.getBlock(scoreBlockPosition).asVector();

Block[] blocks = new Block[page.getBlockCount()];
for (int i = 0; i < page.getBlockCount(); i++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
# top-n query at the shard level

knnSearch
required_capability: knn_function_v3
required_capability: knn_function_v4

// tag::knn-function[]
from colors metadata _score
| where knn(rgb_vector, [0, 120, 0], 10)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed the k parameter on the tests

| where knn(rgb_vector, [0, 120, 0])
| sort _score desc, color asc
// end::knn-function[]
| keep color, rgb_vector
Expand All @@ -30,10 +30,10 @@ chartreuse | [127.0, 255.0, 0.0]
;

knnSearchWithSimilarityOption
required_capability: knn_function_v3
required_capability: knn_function_v4

from colors metadata _score
| where knn(rgb_vector, [255,192,203], 140, {"similarity": 40})
| where knn(rgb_vector, [255,192,203], {"similarity": 40})
| sort _score desc, color asc
| keep color, rgb_vector
;
Expand All @@ -46,13 +46,14 @@ wheat | [245.0, 222.0, 179.0]
;

knnHybridSearch
required_capability: knn_function_v3
required_capability: knn_function_v4

from colors metadata _score
| where match(color, "blue") or knn(rgb_vector, [65,105,225], 10)
| where match(color, "blue") or knn(rgb_vector, [65,105,225])
| where primary == true
| sort _score desc, color asc
| keep color, rgb_vector
| limit 10
;

color:text | rgb_vector:dense_vector
Expand All @@ -68,10 +69,10 @@ yellow | [255.0, 255.0, 0.0]
;

knnWithPrefilter
required_capability: knn_function_v3
required_capability: knn_function_v4

from colors
| where knn(rgb_vector, [120,180,0], 10) and (match(color, "olive") or match(color, "green"))
| where knn(rgb_vector, [120,180,0]) and (match(color, "olive") or match(color, "green"))
| sort color asc
| keep color
;
Expand All @@ -82,10 +83,10 @@ olive
;

knnWithNegatedPrefilter
required_capability: knn_function_v3
required_capability: knn_function_v4

from colors metadata _score
| where knn(rgb_vector, [128,128,0], 10) and not (match(color, "olive") or match(color, "chocolate"))
| where knn(rgb_vector, [128,128,0]) and not (match(color, "olive") or match(color, "chocolate"))
| sort _score desc, color asc
| keep color, rgb_vector
| LIMIT 10
Expand All @@ -105,11 +106,11 @@ orange | [255.0, 165.0, 0.0]
;

knnAfterKeep
required_capability: knn_function_v3
required_capability: knn_function_v4

from colors metadata _score
| keep rgb_vector, color, _score
| where knn(rgb_vector, [128,255,0], 140)
| where knn(rgb_vector, [128,255,0])
| sort _score desc, color asc
| keep rgb_vector
| limit 5
Expand All @@ -124,11 +125,11 @@ rgb_vector:dense_vector
;

knnAfterDrop
required_capability: knn_function_v3
required_capability: knn_function_v4

from colors metadata _score
| drop primary
| where knn(rgb_vector, [128,250,0], 140)
| where knn(rgb_vector, [128,250,0])
| sort _score desc, color asc
| keep color, rgb_vector
| limit 5
Expand All @@ -143,11 +144,11 @@ lime | [0.0, 255.0, 0.0]
;

knnAfterEval
required_capability: knn_function_v3
required_capability: knn_function_v4

from colors metadata _score
| eval composed_name = locate(color, " ") > 0
| where knn(rgb_vector, [128,128,0], 140)
| where knn(rgb_vector, [128,128,0])
| sort _score desc, color asc
| keep color, composed_name
| limit 5
Expand All @@ -162,12 +163,13 @@ golden rod | true
;

knnWithConjunction
required_capability: knn_function_v3
required_capability: knn_function_v4

from colors metadata _score
| where knn(rgb_vector, [255,255,238], 10) and hex_code like "#FFF*"
| where knn(rgb_vector, [255,255,238]) and hex_code like "#FFF*"
| sort _score desc, color asc
| keep color, hex_code, rgb_vector
| limit 10
;

color:text | hex_code:keyword | rgb_vector:dense_vector
Expand All @@ -181,10 +183,10 @@ yellow | #FFFF00 | [255.0, 255.0, 0.0]
;

knnWithDisjunctionAndFiltersConjunction
required_capability: knn_function_v3
required_capability: knn_function_v4

from colors metadata _score
| where (knn(rgb_vector, [0,255,255], 140) or knn(rgb_vector, [128, 0, 255], 10)) and primary == true
| where (knn(rgb_vector, [0,255,255]) or knn(rgb_vector, [128, 0, 255])) and primary == true
| keep color, rgb_vector, _score
| sort _score desc, color asc
| drop _score
Expand All @@ -204,10 +206,10 @@ yellow | [255.0, 255.0, 0.0]
;

knnWithNegationsAndFiltersConjunction
required_capability: knn_function_v3
required_capability: knn_function_v4

from colors metadata _score
| where (knn(rgb_vector, [0,255,255], 140) and not(primary == true and match(color, "blue")))
| where (knn(rgb_vector, [0,255,255]) and not(primary == true and match(color, "blue")))
| sort _score desc, color asc
| keep color, rgb_vector
| limit 10
Expand All @@ -227,11 +229,11 @@ azure | [240.0, 255.0, 255.0]
;

knnWithNonPushableConjunction
required_capability: knn_function_v3
required_capability: knn_function_v4

from colors metadata _score
| eval composed_name = locate(color, " ") > 0
| where knn(rgb_vector, [128,128,0], 140) and composed_name == false
| where knn(rgb_vector, [128,128,0], {"min_candidates": 100}) and composed_name == false
| sort _score desc, color asc
| keep color, composed_name
| limit 10
Expand All @@ -251,58 +253,88 @@ maroon | false
;

testKnnWithNonPushableDisjunctions
required_capability: knn_function_v3
required_capability: knn_function_v4

from colors metadata _score
| where knn(rgb_vector, [128,128,0], 140, {"similarity": 30}) or length(color) > 10
| where knn(rgb_vector, [128,128,0]) or length(color) > 10
| sort _score desc, color asc
| keep color
| keep color
| limit 10
;

color:text
olive
aqua marine
lemon chiffon
papaya whip
olive
sienna
chocolate
peru
golden rod
brown
firebrick
chartreuse
gray
green
;

testKnnWithNonPushableDisjunctionsOnComplexExpressions
required_capability: knn_function_v3
testKnnWithNonPushableDisjunctionsAndMinCandidates
required_capability: knn_function_v4

from colors metadata _score
| where (knn(rgb_vector, [128,128,0], 140, {"similarity": 70}) and length(color) < 10) or (knn(rgb_vector, [128,0,128], 140, {"similarity": 60}) and primary == false)
| where (knn(rgb_vector, [128,128,0], {"min_candidates": 2}) and length(color) > 10) or (knn(rgb_vector, [128,0,128], {"min_candidates": 2}) and primary == true)
| sort _score desc, color asc
| keep color, primary
;

color:text | primary:boolean
olive | false
purple | false
indigo | false
;
gray | true
green | true
red | true
black | true
magenta | true
yellow | true
blue | true
aqua marine | false
papaya whip | false
lemon chiffon | false
white | true
cyan | true
;

testKnnWithStats
required_capability: knn_function_v4

testKnnInStatsNonPushable
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

knn makes no sense on stats - I will remove it on a later PR

required_capability: knn_function_v3

from colors
| where length(color) < 10
| stats c = count(*) where knn(rgb_vector, [128,128,255], 140)
from colors metadata _score
| where knn(rgb_vector, [128,128,0])
| sort _score desc, color asc
| limit 15
| stats c = count(*)
;

c: long
50
c:long
15
;

testKnnInStatsWithGrouping
required_capability: knn_function_v3
required_capability: full_text_functions_in_stats_where
testKnnWithRerank
required_capability: knn_function_v4
required_capability: rerank

from colors
| where length(color) < 10
| stats c = count(*) where knn(rgb_vector, [128,128,255], 140) by primary
from colors metadata _score
| where knn(rgb_vector, [100,120,0])
| sort _score desc, color asc
| limit 10
| rerank rerank_score = "deepest blue" ON color WITH { "inference_id" : "test_reranker" }
| sort rerank_score desc, color asc
| keep color
;

c: long | primary: boolean
41 | false
9 | true
color:text
gray
peru
brown
green
olive
maroon
sienna
chocolate
firebrick
golden rod
;
Loading
Loading