-
Notifications
You must be signed in to change notification settings - Fork 25.6k
ESQL - KNN function uses LIMIT for K, transforms to exact search when not pushed down #132944
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 39 commits
ee3c806
82779ec
0fb162a
5694ae7
3e68d71
1b2829b
e3dd487
38cfe1d
e4ef60f
d92a5dd
66e3dcb
71d3a48
eb47c7b
3daf953
5e2d026
5d1f694
db5c018
952a7c9
f499025
09e02da
69e7731
52f75f1
6cbf31a
932a7c7
e55ff48
9fa0fd6
21ff308
a921df5
de45fb0
2173a0c
9d3c85f
e95a5bc
a2cdfbc
d11d074
09d365c
af3296c
e9ed8e5
9fe7f90
8fe374d
c4f3da7
c19a43a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,7 +9,6 @@ | |
|
|
||
| import org.elasticsearch.compute.data.Block; | ||
| import org.elasticsearch.compute.data.BlockFactory; | ||
| import org.elasticsearch.compute.data.DocVector; | ||
| import org.elasticsearch.compute.data.DoubleBlock; | ||
| import org.elasticsearch.compute.data.DoubleVector; | ||
| import org.elasticsearch.compute.data.Page; | ||
|
|
@@ -46,9 +45,9 @@ public ScoreOperator(BlockFactory blockFactory, ExpressionScorer scorer, int sco | |
|
|
||
| @Override | ||
| protected Page process(Page page) { | ||
| assert page.getBlockCount() >= 2 : "Expected at least 2 blocks, got " + page.getBlockCount(); | ||
| assert page.getBlock(0).asVector() instanceof DocVector : "Expected a DocVector, got " + page.getBlock(0).asVector(); | ||
| assert page.getBlock(1).asVector() instanceof DoubleVector : "Expected a DoubleVector, got " + page.getBlock(1).asVector(); | ||
| assert page.getBlockCount() > scoreBlockPosition : "Expected to get a score block in position " + scoreBlockPosition; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Minor unrelated change - removes unnecessary assertions and uses a non-hardcoded position |
||
| assert page.getBlock(scoreBlockPosition).asVector() instanceof DoubleVector | ||
| : "Expected a DoubleVector as a score block, got " + page.getBlock(scoreBlockPosition).asVector(); | ||
|
|
||
| Block[] blocks = new Block[page.getBlockCount()]; | ||
| for (int i = 0; i < page.getBlockCount(); i++) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,11 +3,11 @@ | |
| # top-n query at the shard level | ||
|
|
||
| knnSearch | ||
| required_capability: knn_function_v3 | ||
| required_capability: knn_function_v4 | ||
|
|
||
| // tag::knn-function[] | ||
| from colors metadata _score | ||
| | where knn(rgb_vector, [0, 120, 0], 10) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removed the k parameter on the tests |
||
| | where knn(rgb_vector, [0, 120, 0]) | ||
| | sort _score desc, color asc | ||
| // end::knn-function[] | ||
| | keep color, rgb_vector | ||
|
|
@@ -30,10 +30,10 @@ chartreuse | [127.0, 255.0, 0.0] | |
| ; | ||
|
|
||
| knnSearchWithSimilarityOption | ||
| required_capability: knn_function_v3 | ||
| required_capability: knn_function_v4 | ||
|
|
||
| from colors metadata _score | ||
| | where knn(rgb_vector, [255,192,203], 140, {"similarity": 40}) | ||
| | where knn(rgb_vector, [255,192,203], {"similarity": 40}) | ||
| | sort _score desc, color asc | ||
| | keep color, rgb_vector | ||
| ; | ||
|
|
@@ -46,13 +46,14 @@ wheat | [245.0, 222.0, 179.0] | |
| ; | ||
|
|
||
| knnHybridSearch | ||
| required_capability: knn_function_v3 | ||
| required_capability: knn_function_v4 | ||
|
|
||
| from colors metadata _score | ||
| | where match(color, "blue") or knn(rgb_vector, [65,105,225], 10) | ||
| | where match(color, "blue") or knn(rgb_vector, [65,105,225]) | ||
| | where primary == true | ||
| | sort _score desc, color asc | ||
| | keep color, rgb_vector | ||
| | limit 10 | ||
| ; | ||
|
|
||
| color:text | rgb_vector:dense_vector | ||
|
|
@@ -68,10 +69,10 @@ yellow | [255.0, 255.0, 0.0] | |
| ; | ||
|
|
||
| knnWithPrefilter | ||
| required_capability: knn_function_v3 | ||
| required_capability: knn_function_v4 | ||
|
|
||
| from colors | ||
| | where knn(rgb_vector, [120,180,0], 10) and (match(color, "olive") or match(color, "green")) | ||
| | where knn(rgb_vector, [120,180,0]) and (match(color, "olive") or match(color, "green")) | ||
| | sort color asc | ||
| | keep color | ||
| ; | ||
|
|
@@ -82,10 +83,10 @@ olive | |
| ; | ||
|
|
||
| knnWithNegatedPrefilter | ||
| required_capability: knn_function_v3 | ||
| required_capability: knn_function_v4 | ||
|
|
||
| from colors metadata _score | ||
| | where knn(rgb_vector, [128,128,0], 10) and not (match(color, "olive") or match(color, "chocolate")) | ||
| | where knn(rgb_vector, [128,128,0]) and not (match(color, "olive") or match(color, "chocolate")) | ||
| | sort _score desc, color asc | ||
| | keep color, rgb_vector | ||
| | LIMIT 10 | ||
|
|
@@ -105,11 +106,11 @@ orange | [255.0, 165.0, 0.0] | |
| ; | ||
|
|
||
| knnAfterKeep | ||
| required_capability: knn_function_v3 | ||
| required_capability: knn_function_v4 | ||
|
|
||
| from colors metadata _score | ||
| | keep rgb_vector, color, _score | ||
| | where knn(rgb_vector, [128,255,0], 140) | ||
| | where knn(rgb_vector, [128,255,0]) | ||
| | sort _score desc, color asc | ||
| | keep rgb_vector | ||
| | limit 5 | ||
|
|
@@ -124,11 +125,11 @@ rgb_vector:dense_vector | |
| ; | ||
|
|
||
| knnAfterDrop | ||
| required_capability: knn_function_v3 | ||
| required_capability: knn_function_v4 | ||
|
|
||
| from colors metadata _score | ||
| | drop primary | ||
| | where knn(rgb_vector, [128,250,0], 140) | ||
| | where knn(rgb_vector, [128,250,0]) | ||
| | sort _score desc, color asc | ||
| | keep color, rgb_vector | ||
| | limit 5 | ||
|
|
@@ -143,11 +144,11 @@ lime | [0.0, 255.0, 0.0] | |
| ; | ||
|
|
||
| knnAfterEval | ||
| required_capability: knn_function_v3 | ||
| required_capability: knn_function_v4 | ||
|
|
||
| from colors metadata _score | ||
| | eval composed_name = locate(color, " ") > 0 | ||
| | where knn(rgb_vector, [128,128,0], 140) | ||
| | where knn(rgb_vector, [128,128,0]) | ||
| | sort _score desc, color asc | ||
| | keep color, composed_name | ||
| | limit 5 | ||
|
|
@@ -162,12 +163,13 @@ golden rod | true | |
| ; | ||
|
|
||
| knnWithConjunction | ||
| required_capability: knn_function_v3 | ||
| required_capability: knn_function_v4 | ||
|
|
||
| from colors metadata _score | ||
| | where knn(rgb_vector, [255,255,238], 10) and hex_code like "#FFF*" | ||
| | where knn(rgb_vector, [255,255,238]) and hex_code like "#FFF*" | ||
| | sort _score desc, color asc | ||
| | keep color, hex_code, rgb_vector | ||
| | limit 10 | ||
| ; | ||
|
|
||
| color:text | hex_code:keyword | rgb_vector:dense_vector | ||
|
|
@@ -181,10 +183,10 @@ yellow | #FFFF00 | [255.0, 255.0, 0.0] | |
| ; | ||
|
|
||
| knnWithDisjunctionAndFiltersConjunction | ||
| required_capability: knn_function_v3 | ||
| required_capability: knn_function_v4 | ||
|
|
||
| from colors metadata _score | ||
| | where (knn(rgb_vector, [0,255,255], 140) or knn(rgb_vector, [128, 0, 255], 10)) and primary == true | ||
| | where (knn(rgb_vector, [0,255,255]) or knn(rgb_vector, [128, 0, 255])) and primary == true | ||
| | keep color, rgb_vector, _score | ||
| | sort _score desc, color asc | ||
| | drop _score | ||
|
|
@@ -204,10 +206,10 @@ yellow | [255.0, 255.0, 0.0] | |
| ; | ||
|
|
||
| knnWithNegationsAndFiltersConjunction | ||
| required_capability: knn_function_v3 | ||
| required_capability: knn_function_v4 | ||
|
|
||
| from colors metadata _score | ||
| | where (knn(rgb_vector, [0,255,255], 140) and not(primary == true and match(color, "blue"))) | ||
| | where (knn(rgb_vector, [0,255,255]) and not(primary == true and match(color, "blue"))) | ||
| | sort _score desc, color asc | ||
| | keep color, rgb_vector | ||
| | limit 10 | ||
|
|
@@ -227,11 +229,11 @@ azure | [240.0, 255.0, 255.0] | |
| ; | ||
|
|
||
| knnWithNonPushableConjunction | ||
| required_capability: knn_function_v3 | ||
| required_capability: knn_function_v4 | ||
|
|
||
| from colors metadata _score | ||
| | eval composed_name = locate(color, " ") > 0 | ||
| | where knn(rgb_vector, [128,128,0], 140) and composed_name == false | ||
| | where knn(rgb_vector, [128,128,0], {"min_candidates": 100}) and composed_name == false | ||
| | sort _score desc, color asc | ||
| | keep color, composed_name | ||
| | limit 10 | ||
|
|
@@ -251,58 +253,88 @@ maroon | false | |
| ; | ||
|
|
||
| testKnnWithNonPushableDisjunctions | ||
| required_capability: knn_function_v3 | ||
| required_capability: knn_function_v4 | ||
|
|
||
| from colors metadata _score | ||
| | where knn(rgb_vector, [128,128,0], 140, {"similarity": 30}) or length(color) > 10 | ||
| | where knn(rgb_vector, [128,128,0]) or length(color) > 10 | ||
| | sort _score desc, color asc | ||
| | keep color | ||
| | keep color | ||
| | limit 10 | ||
| ; | ||
|
|
||
| color:text | ||
| olive | ||
| aqua marine | ||
| lemon chiffon | ||
| papaya whip | ||
| olive | ||
| sienna | ||
| chocolate | ||
| peru | ||
| golden rod | ||
| brown | ||
| firebrick | ||
| chartreuse | ||
| gray | ||
| green | ||
| ; | ||
|
|
||
| testKnnWithNonPushableDisjunctionsOnComplexExpressions | ||
| required_capability: knn_function_v3 | ||
| testKnnWithNonPushableDisjunctionsAndMinCandidates | ||
| required_capability: knn_function_v4 | ||
|
|
||
| from colors metadata _score | ||
| | where (knn(rgb_vector, [128,128,0], 140, {"similarity": 70}) and length(color) < 10) or (knn(rgb_vector, [128,0,128], 140, {"similarity": 60}) and primary == false) | ||
| | where (knn(rgb_vector, [128,128,0], {"min_candidates": 2}) and length(color) > 10) or (knn(rgb_vector, [128,0,128], {"min_candidates": 2}) and primary == true) | ||
| | sort _score desc, color asc | ||
| | keep color, primary | ||
| ; | ||
|
|
||
| color:text | primary:boolean | ||
| olive | false | ||
| purple | false | ||
| indigo | false | ||
| ; | ||
| gray | true | ||
| green | true | ||
| red | true | ||
| black | true | ||
| magenta | true | ||
| yellow | true | ||
| blue | true | ||
| aqua marine | false | ||
| papaya whip | false | ||
| lemon chiffon | false | ||
| white | true | ||
| cyan | true | ||
| ; | ||
|
|
||
| testKnnWithStats | ||
| required_capability: knn_function_v4 | ||
|
|
||
| testKnnInStatsNonPushable | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. knn makes no sense on stats - I will remove it on a later PR |
||
| required_capability: knn_function_v3 | ||
|
|
||
| from colors | ||
| | where length(color) < 10 | ||
| | stats c = count(*) where knn(rgb_vector, [128,128,255], 140) | ||
| from colors metadata _score | ||
| | where knn(rgb_vector, [128,128,0]) | ||
| | sort _score desc, color asc | ||
| | limit 15 | ||
| | stats c = count(*) | ||
| ; | ||
|
|
||
| c: long | ||
| 50 | ||
| c:long | ||
| 15 | ||
| ; | ||
|
|
||
| testKnnInStatsWithGrouping | ||
| required_capability: knn_function_v3 | ||
| required_capability: full_text_functions_in_stats_where | ||
| testKnnWithRerank | ||
| required_capability: knn_function_v4 | ||
| required_capability: rerank | ||
|
|
||
| from colors | ||
| | where length(color) < 10 | ||
| | stats c = count(*) where knn(rgb_vector, [128,128,255], 140) by primary | ||
| from colors metadata _score | ||
| | where knn(rgb_vector, [100,120,0]) | ||
| | sort _score desc, color asc | ||
| | limit 10 | ||
| | rerank rerank_score = "deepest blue" ON color WITH { "inference_id" : "test_reranker" } | ||
| | sort rerank_score desc, color asc | ||
| | keep color | ||
| ; | ||
|
|
||
| c: long | primary: boolean | ||
| 41 | false | ||
| 9 | true | ||
| color:text | ||
| gray | ||
| peru | ||
| brown | ||
| green | ||
| olive | ||
| maroon | ||
| sienna | ||
| chocolate | ||
| firebrick | ||
| golden rod | ||
| ; | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Minor unrelated change - Retrieves the doc block instead of hardcoding it