diff --git a/docs/reference/query-languages/esql/_snippets/functions/description/score.md b/docs/reference/query-languages/esql/_snippets/functions/description/score.md new file mode 100644 index 0000000000000..82426283b03a0 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/description/score.md @@ -0,0 +1,6 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Description** + +Scores an expression. Only full text functions will be scored. Returns scores for all the resulting docs. + diff --git a/docs/reference/query-languages/esql/_snippets/functions/examples/score.md b/docs/reference/query-languages/esql/_snippets/functions/examples/score.md new file mode 100644 index 0000000000000..86691e4e941a8 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/examples/score.md @@ -0,0 +1,11 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Example** + +```esql +FROM books METADATA _score +| WHERE match(title, "Return") AND match(author, "Tolkien") +| EVAL first_score = score(match(title, "Return")) +``` + + diff --git a/docs/reference/query-languages/esql/_snippets/functions/layout/score.md b/docs/reference/query-languages/esql/_snippets/functions/layout/score.md new file mode 100644 index 0000000000000..b2fa5e09baeac --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/layout/score.md @@ -0,0 +1,27 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +## `SCORE` [esql-score] +```{applies_to} +stack: development +serverless: preview +``` + +**Syntax** + +:::{image} ../../../images/functions/score.svg +:alt: Embedded +:class: text-center +::: + + +:::{include} ../parameters/score.md +::: + +:::{include} ../description/score.md +::: + +:::{include} ../types/score.md +::: + +:::{include} ../examples/score.md +::: diff --git a/docs/reference/query-languages/esql/_snippets/functions/parameters/score.md b/docs/reference/query-languages/esql/_snippets/functions/parameters/score.md new file mode 100644 index 0000000000000..511ced1094f91 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/parameters/score.md @@ -0,0 +1,7 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Parameters** + +`query` +: (combinations of) full text function(s). + diff --git a/docs/reference/query-languages/esql/_snippets/functions/types/score.md b/docs/reference/query-languages/esql/_snippets/functions/types/score.md new file mode 100644 index 0000000000000..ab4532fd069a5 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/types/score.md @@ -0,0 +1,8 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Supported types** + +| query | result | +| --- | --- | +| boolean | double | + diff --git a/docs/reference/query-languages/esql/images/functions/score.svg b/docs/reference/query-languages/esql/images/functions/score.svg new file mode 100644 index 0000000000000..9662976dd6db1 --- /dev/null +++ b/docs/reference/query-languages/esql/images/functions/score.svg @@ -0,0 +1 @@ +SCORE(query) \ No newline at end of file diff --git a/docs/reference/query-languages/esql/kibana/definition/functions/score.json b/docs/reference/query-languages/esql/kibana/definition/functions/score.json new file mode 100644 index 0000000000000..c9b5e22a02e4c --- /dev/null +++ b/docs/reference/query-languages/esql/kibana/definition/functions/score.json @@ -0,0 +1,25 @@ +{ + "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it.", + "type" : "scalar", + "name" : "score", + "description" : "Scores an expression. Only full text functions will be scored. Returns scores for all the resulting docs.", + "signatures" : [ + { + "params" : [ + { + "name" : "query", + "type" : "boolean", + "optional" : false, + "description" : "(combinations of) full text function(s)." + } + ], + "variadic" : false, + "returnType" : "double" + } + ], + "examples" : [ + "FROM books METADATA _score\n| WHERE match(title, \"Return\") AND match(author, \"Tolkien\")\n| EVAL first_score = score(match(title, \"Return\"))" + ], + "preview" : true, + "snapshot_only" : true +} diff --git a/docs/reference/query-languages/esql/kibana/docs/functions/score.md b/docs/reference/query-languages/esql/kibana/docs/functions/score.md new file mode 100644 index 0000000000000..865a7b0758ba9 --- /dev/null +++ b/docs/reference/query-languages/esql/kibana/docs/functions/score.md @@ -0,0 +1,10 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +### SCORE +Scores an expression. Only full text functions will be scored. Returns scores for all the resulting docs. + +```esql +FROM books METADATA _score +| WHERE match(title, "Return") AND match(author, "Tolkien") +| EVAL first_score = score(match(title, "Return")) +``` diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/score-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/score-function.csv-spec new file mode 100644 index 0000000000000..1a39418e9a28d --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/score-function.csv-spec @@ -0,0 +1,127 @@ +############################################### +# Tests for Score function +# + +scoreSingle +required_capability: metadata_score +required_capability: score_function +required_capability: match_function + +// tag::score-function[] +FROM books METADATA _score +| WHERE match(title, "Return") AND match(author, "Tolkien") +| EVAL first_score = score(match(title, "Return")) +// end::score-function[] +| KEEP book_no, title, _score, first_score +| SORT book_no +| LIMIT 5 +; + +// tag::score-single-result[] +book_no:keyword | title:text | _score:double | first_score:double +2714 | Return of the King Being the Third Part of The Lord of the Rings | 3.1309072971343994 | 1.9245924949645996 +7350 | Return of the Shadow | 4.8434343338012695 | 3.5432329177856445 +// end::score-single-result[] +; + +scoreSingleNoMetadata +required_capability: score_function +required_capability: match_function + +FROM books +| WHERE match(title, "Return") AND match(author, "Tolkien") +| EVAL first_score = score(match(title, "Return")) +| KEEP book_no, title, first_score +| SORT book_no +| LIMIT 5 +; + +book_no:keyword | title:text | first_score:double +2714 | Return of the King Being the Third Part of The Lord of the Rings | 1.9245924949645996 +7350 | Return of the Shadow | 3.5432329177856445 +; + +scoreAfterEval +required_capability: score_function +required_capability: metadata_score +required_capability: match_function + +FROM books METADATA _score +| EVAL stars = to_long(ratings / 2.0) +| EVAL s1 = score(match(author, "William")) +| WHERE match(author, "Faulkner") +| SORT book_no +| KEEP book_no, author, stars, s1 +| limit 5; + +book_no:keyword | author:text | stars:long | s1:double +2378 | [Carol Faulkner, Holly Byers Ochoa, Lucretia Mott] | 3 | 0.0 +2713 | William Faulkner | 2 | 1.9043500423431396 +2847 | Colleen Faulkner | 3 | 0.0 +2883 | William Faulkner | 2 | 1.9043500423431396 +3293 | Danny Faulkner | 2 | 0.0 +; + +scoreMatchWithFilterConjunction +required_capability: score_function +required_capability: match_function + +FROM books +| WHERE match(title, "Return") AND match(author, "Tolkien") +| EVAL s1 = score(match(title, "Rings") and ratings > 4.6) +| KEEP book_no, title, s1 +| SORT book_no +| LIMIT 5; + +book_no:keyword | title:text | s1:double +2714 | Return of the King Being the Third Part of The Lord of the Rings | 1.9245924949645996 +7350 | Return of the Shadow | 0.0 +; + +scoreMatchWithDisjunction +required_capability: score_function +required_capability: match_function + +FROM books +| WHERE match(title, "Return") AND match(author, "Tolkien") +| EVAL s1 = score(match(title, "Rings") or match(title, "Shadow")) +| KEEP book_no, title, s1 +| SORT book_no +| LIMIT 5; + +book_no:keyword | title:text | s1:double +2714 | Return of the King Being the Third Part of The Lord of the Rings | 1.9245924949645996 +7350 | Return of the Shadow | 3.5432329177856445 +; + +scoreMatchWithDisjunctionAndFilter +required_capability: score_function +required_capability: match_function + +FROM books +| WHERE match(title, "Return") AND match(author, "Tolkien") +| EVAL s1 = score(match(title, "Rings") or match(title, "Shadow") and ratings > 4.6) +| KEEP book_no, title, s1 +| SORT book_no +| LIMIT 5; + +book_no:keyword | title:text | s1:double +2714 | Return of the King Being the Third Part of The Lord of the Rings | 1.9245924949645996 +7350 | Return of the Shadow | 3.5432329177856445 +; + +scoreMatchDisjunctionNonPushable +required_capability: score_function +required_capability: match_function + +FROM books +| WHERE match(title, "Return") AND match(author, "Tolkien") +| EVAL s1 = score(match(title, "Rings") or ratings > 4.6) +| KEEP book_no, title, s1 +| SORT book_no +| LIMIT 5; + +book_no:keyword | title:text | s1:double +2714 | Return of the King Being the Third Part of The Lord of the Rings | 1.9245924949645996 +7350 | Return of the Shadow | 0.0 +; diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ScoreFunctionIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ScoreFunctionIT.java new file mode 100644 index 0000000000000..6c60c0334eddd --- /dev/null +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ScoreFunctionIT.java @@ -0,0 +1,494 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.plugin; + +import org.elasticsearch.action.index.IndexRequest; +import org.elasticsearch.action.support.WriteRequest; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.util.CollectionUtils; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.xpack.esql.VerificationException; +import org.elasticsearch.xpack.esql.action.AbstractEsqlIntegTestCase; +import org.elasticsearch.xpack.esql.action.EsqlCapabilities; +import org.elasticsearch.xpack.kql.KqlPlugin; +import org.junit.Before; + +import java.util.Collection; +import java.util.List; + +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; +import static org.hamcrest.CoreMatchers.containsString; + +//@TestLogging(value = "org.elasticsearch.xpack.esql:TRACE,org.elasticsearch.compute:TRACE", reason = "debug") +public class ScoreFunctionIT extends AbstractEsqlIntegTestCase { + + @Before + public void setupIndex() { + assumeTrue("can run this only when score() function is enabled", EsqlCapabilities.Cap.SCORE_FUNCTION.isEnabled()); + createAndPopulateIndex(); + } + + public void testScoreSingleNoMetadata() { + var query = """ + FROM test + | WHERE match(content, "fox") AND match(content, "brown") + | EVAL first_score = score(match(content, "fox")) + | KEEP id, first_score + | SORT id + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id", "first_score")); + assertColumnTypes(resp.columns(), List.of("integer", "double")); + assertValues(resp.values(), List.of(List.of(1, 1.156558871269226), List.of(6, 0.9114001989364624))); + } + } + + public void testScoreWithLimit() { + var query = """ + FROM test + | WHERE match(content, "fox") AND match(content, "brown") + | EVAL first_score = score(match(content, "fox")) + | KEEP id, first_score + | SORT id + | LIMIT 1 + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id", "first_score")); + assertColumnTypes(resp.columns(), List.of("integer", "double")); + assertValues(resp.values(), List.of(List.of(1, 1.156558871269226))); + } + } + + public void testScoreAfterLimit() { + var query = """ + FROM test + | WHERE match(content, "fox") AND match(content, "brown") + | LIMIT 1 + | EVAL first_score = score(match(content, "fox")) + | KEEP id, first_score + | SORT id + """; + + var error = expectThrows(VerificationException.class, () -> run(query)); + assertThat(error.getMessage(), containsString("[SCORE] function cannot be used after LIMIT")); + } + + public void testScoreQueryExpressions() { + var query = """ + FROM test METADATA _score + | WHERE match(content, "fox") AND match(content, "brown") + | EVAL first_score = score(match(content, CONCAT("brown ", " fox"))) + | KEEP id, first_score + | SORT id + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id", "first_score")); + assertColumnTypes(resp.columns(), List.of("integer", "double")); + assertValues(resp.values(), List.of(List.of(1, 1.4274532794952393), List.of(6, 1.1248724460601807))); + } + } + + public void testDisjunctionWithFiltersNoMetadata() { + var query = """ + FROM test + | EVAL first_score = score((match(content, "fox") OR match(content, "brown")) AND id > 1) + | WHERE match(content, "fox") AND match(content, "brown") + | KEEP id, first_score + | SORT id + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id", "first_score")); + assertColumnTypes(resp.columns(), List.of("integer", "double")); + assertValues(resp.values(), List.of(List.of(1, 1.4274532496929169), List.of(6, 1.1248724162578583))); + } + } + + public void testScoreDifferentWhereMatch() { + var query = """ + FROM test METADATA _score + | EVAL first_score = score(match(content, "brown")) + | WHERE match(content, "fox") + | KEEP id, _score, first_score + | SORT id + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id", "_score", "first_score")); + assertColumnTypes(resp.columns(), List.of("integer", "double", "double")); + assertValues( + resp.values(), + List.of(List.of(1, 1.156558871269226, 0.2708943784236908), List.of(6, 0.9114001989364624, 0.21347221732139587)) + ); + } + } + + public void testScoreDifferentWhereMatchNoMetadata() { + var query = """ + FROM test + | EVAL first_score = score(match(content, "brown")) + | WHERE match(content, "fox") + | KEEP id, first_score + | SORT id + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id", "first_score")); + assertColumnTypes(resp.columns(), List.of("integer", "double")); + assertValues(resp.values(), List.of(List.of(1, 0.2708943784236908), List.of(6, 0.21347221732139587))); + } + } + + public void testScoreInWhereWithMatch() { + var query = """ + FROM test + | WHERE score(match(content, "brown")) + """; + + var error = expectThrows(VerificationException.class, () -> run(query)); + assertThat(error.getMessage(), containsString("[SCORE] function can't be used in WHERE")); + } + + public void testScoreInWhereWithFilter() { + var query = """ + FROM test + | WHERE score(id > 0) + """; + + var error = expectThrows(VerificationException.class, () -> run(query)); + assertThat(error.getMessage(), containsString("Condition expression needs to be boolean, found [DOUBLE]")); + } + + public void testScoreNonFullTextFunction() { + var query = """ + FROM test + | EVAL meaningless = score(abs(-0.1)) + | KEEP id, meaningless + | SORT id + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id", "meaningless")); + assertColumnTypes(resp.columns(), List.of("integer", "double")); + assertValues( + resp.values(), + List.of(List.of(1, 0.0), List.of(2, 0.0), List.of(3, 0.0), List.of(4, 0.0), List.of(5, 0.0), List.of(6, 0.0)) + ); + } + } + + public void testScoreMultipleWhereMatch() { + var query = """ + FROM test METADATA _score + | WHERE match(content, "brown") + | WHERE match(content, "fox") + | EVAL first_score = score(match(content, "brown")) + | KEEP id, _score, first_score + | SORT id + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id", "_score", "first_score")); + assertColumnTypes(resp.columns(), List.of("integer", "double", "double")); + assertValues( + resp.values(), + List.of(List.of(1, 1.4274532794952393, 0.2708943784236908), List.of(6, 1.1248724460601807, 0.21347221732139587)) + ); + } + } + + public void testScoreMultipleWhereKqlMatch() { + var query = """ + FROM test METADATA _score + | WHERE kql("brown") + | WHERE match(content, "fox") + | EVAL first_score = score(kql("brown")) + | KEEP id, _score, first_score + | SORT id + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id", "_score", "first_score")); + assertColumnTypes(resp.columns(), List.of("integer", "double", "double")); + assertValues( + resp.values(), + List.of(List.of(1, 1.4274532794952393, 0.2708943784236908), List.of(6, 1.1248724460601807, 0.21347221732139587)) + ); + } + } + + public void testScoreMultipleWhereQstrMatch() { + var query = """ + FROM test METADATA _score + | WHERE qstr("brown") + | WHERE match(content, "fox") + | EVAL first_score = score(qstr("brown")) + | KEEP id, _score, first_score + | SORT id + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id", "_score", "first_score")); + assertColumnTypes(resp.columns(), List.of("integer", "double", "double")); + assertValues( + resp.values(), + List.of(List.of(1, 1.4274532794952393, 0.2708943784236908), List.of(6, 1.1248724460601807, 0.21347221732139587)) + ); + } + } + + public void testScoreSameWhereQstrAndMatch() { + var query = """ + FROM test METADATA _score + | WHERE qstr("brown") AND match(content, "fox") + | EVAL first_score = score(qstr("brown") AND match(content, "fox")) + | KEEP id, _score, first_score + | SORT id + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id", "_score", "first_score")); + assertColumnTypes(resp.columns(), List.of("integer", "double", "double")); + assertValues( + resp.values(), + List.of(List.of(1, 1.4274532794952393, 1.4274532496929169), List.of(6, 1.1248724460601807, 1.1248724162578583)) + ); + } + } + + public void testScoreSingleWhereQstrAndMatch() { + var query = """ + FROM test METADATA _score + | WHERE qstr("brown") AND match(content, "fox") + | EVAL first_score = score(qstr("brown")) + | KEEP id, _score, first_score + | SORT id + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id", "_score", "first_score")); + assertColumnTypes(resp.columns(), List.of("integer", "double", "double")); + assertValues( + resp.values(), + List.of(List.of(1, 1.4274532794952393, 0.2708943784236908), List.of(6, 1.1248724460601807, 0.21347221732139587)) + ); + } + } + + public void testScoreBothWhereQstrAndMatch() { + var query = """ + FROM test METADATA _score + | WHERE qstr("brown") AND match(content, "fox") + | EVAL first_score = score(qstr("brown")) + | EVAL second_score = score(match(content, "fox")) + | KEEP id, _score, first_score, second_score + | SORT id + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id", "_score", "first_score", "second_score")); + assertColumnTypes(resp.columns(), List.of("integer", "double", "double", "double")); + assertValues( + resp.values(), + List.of( + List.of(1, 1.4274532794952393, 0.2708943784236908, 1.156558871269226), + List.of(6, 1.1248724460601807, 0.21347221732139587, 0.9114001989364624) + ) + ); + } + } + + public void testScoreSameWhereKqlAndMatch() { + var query = """ + FROM test METADATA _score + | WHERE kql("brown") AND match(content, "fox") + | EVAL first_score = score(kql("brown") AND match(content, "fox")) + | KEEP id, _score, first_score + | SORT id + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id", "_score", "first_score")); + assertColumnTypes(resp.columns(), List.of("integer", "double", "double")); + assertValues( + resp.values(), + List.of(List.of(1, 1.4274532794952393, 1.4274532496929169), List.of(6, 1.1248724460601807, 1.1248724162578583)) + ); + } + } + + public void testScoreSingleWhereKqlAndMatch() { + var query = """ + FROM test METADATA _score + | WHERE kql("brown") AND match(content, "fox") + | EVAL first_score = score(kql("brown")) + | KEEP id, _score, first_score + | SORT id + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id", "_score", "first_score")); + assertColumnTypes(resp.columns(), List.of("integer", "double", "double")); + assertValues( + resp.values(), + List.of(List.of(1, 1.4274532794952393, 0.2708943784236908), List.of(6, 1.1248724460601807, 0.21347221732139587)) + ); + } + } + + public void testScoreBothWhereKqlAndMatch() { + var query = """ + FROM test METADATA _score + | WHERE kql("brown") AND match(content, "fox") + | EVAL first_score = score(kql("brown")) + | EVAL second_score = score(match(content, "fox")) + | KEEP id, _score, first_score, second_score + | SORT id + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id", "_score", "first_score", "second_score")); + assertColumnTypes(resp.columns(), List.of("integer", "double", "double", "double")); + assertValues( + resp.values(), + List.of( + List.of(1, 1.4274532794952393, 0.2708943784236908, 1.156558871269226), + List.of(6, 1.1248724460601807, 0.21347221732139587, 0.9114001989364624) + ) + ); + } + } + + public void testScoreSameWhereQstrORMatch() { + var query = """ + FROM test METADATA _score + | WHERE qstr("brown") OR match(content, "fox") + | EVAL first_score = score(qstr("brown") OR match(content, "fox")) + | KEEP id, _score, first_score + | SORT id + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id", "_score", "first_score")); + assertColumnTypes(resp.columns(), List.of("integer", "double", "double")); + assertValues( + resp.values(), + List.of( + List.of(1, 1.4274532794952393, 1.4274532496929169), + List.of(2, 0.2708943784236908, 0.2708943784236908), + List.of(3, 0.2708943784236908, 0.2708943784236908), + List.of(4, 0.19301524758338928, 0.19301524758338928), + List.of(6, 1.1248724460601807, 1.1248724162578583) + ) + ); + } + } + + public void testScoreSingleWhereQstrORMatch() { + var query = """ + FROM test METADATA _score + | WHERE qstr("brown") OR match(content, "fox") + | EVAL first_score = score(qstr("brown")) + | KEEP id, _score, first_score + | SORT id + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id", "_score", "first_score")); + assertColumnTypes(resp.columns(), List.of("integer", "double", "double")); + assertValues( + resp.values(), + List.of( + List.of(1, 1.4274532794952393, 0.2708943784236908), + List.of(2, 0.2708943784236908, 0.2708943784236908), + List.of(3, 0.2708943784236908, 0.2708943784236908), + List.of(4, 0.19301524758338928, 0.19301524758338928), + List.of(6, 1.1248724460601807, 0.21347221732139587) + ) + ); + } + } + + public void testScoreBothWhereQstrORMatch() { + var query = """ + FROM test METADATA _score + | WHERE qstr("brown") OR match(content, "fox") + | EVAL first_score = score(qstr("brown")) + | EVAL second_score = score(match(content, "fox")) + | KEEP id, _score, first_score, second_score + | SORT id + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id", "_score", "first_score", "second_score")); + assertColumnTypes(resp.columns(), List.of("integer", "double", "double", "double")); + assertValues( + resp.values(), + List.of( + List.of(1, 1.4274532794952393, 0.2708943784236908, 1.156558871269226), + List.of(2, 0.2708943784236908, 0.2708943784236908, 0.0), + List.of(3, 0.2708943784236908, 0.2708943784236908, 0.0), + List.of(4, 0.19301524758338928, 0.19301524758338928, 0.0), + List.of(6, 1.1248724460601807, 0.21347221732139587, 0.9114001989364624) + ) + ); + } + } + + public void testSimpleScoreAlone() { + var query = """ + FROM test METADATA _score + | EVAL first_score = score(match(content, "brown")) + | KEEP id, _score, first_score + | SORT id + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id", "_score", "first_score")); + assertColumnTypes(resp.columns(), List.of("integer", "double", "double")); + assertValues( + resp.values(), + List.of( + List.of(1, 0.0, 0.2708943784236908), + List.of(2, 0.0, 0.2708943784236908), + List.of(3, 0.0, 0.2708943784236908), + List.of(4, 0.0, 0.19301524758338928), + List.of(5, 0.0, 0.0), + List.of(6, 0.0, 0.21347221732139587) + ) + ); + } + } + + private void createAndPopulateIndex() { + var indexName = "test"; + var client = client().admin().indices(); + var CreateRequest = client.prepareCreate(indexName) + .setSettings(Settings.builder().put("index.number_of_shards", 1)) + .setMapping("id", "type=integer", "content", "type=text"); + assertAcked(CreateRequest); + client().prepareBulk() + .add(new IndexRequest(indexName).id("1").source("id", 1, "content", "This is a brown fox")) + .add(new IndexRequest(indexName).id("2").source("id", 2, "content", "This is a brown dog")) + .add(new IndexRequest(indexName).id("3").source("id", 3, "content", "This dog is really brown")) + .add(new IndexRequest(indexName).id("4").source("id", 4, "content", "The dog is brown but this document is very very long")) + .add(new IndexRequest(indexName).id("5").source("id", 5, "content", "There is also a white cat")) + .add(new IndexRequest(indexName).id("6").source("id", 6, "content", "The quick brown fox jumps over the lazy dog")) + .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE) + .get(); + ensureYellow(indexName); + } + + @Override + protected Collection> nodePlugins() { + return CollectionUtils.appendToCopy(super.nodePlugins(), KqlPlugin.class); + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 0b51241beebbd..4759579b94d24 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -1077,6 +1077,11 @@ public enum Cap { */ LAST_OVER_TIME(Build.current().isSnapshot()), + /** + * score function + */ + SCORE_FUNCTION(Build.current().isSnapshot()), + /** * Support for the SAMPLE command */ diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java index 630c9c2008a13..fd7f853eec089 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java @@ -49,6 +49,7 @@ import org.elasticsearch.xpack.esql.expression.function.fulltext.MatchPhrase; import org.elasticsearch.xpack.esql.expression.function.fulltext.MultiMatch; import org.elasticsearch.xpack.esql.expression.function.fulltext.QueryString; +import org.elasticsearch.xpack.esql.expression.function.fulltext.Score; import org.elasticsearch.xpack.esql.expression.function.fulltext.Term; import org.elasticsearch.xpack.esql.expression.function.grouping.Bucket; import org.elasticsearch.xpack.esql.expression.function.grouping.Categorize; @@ -477,6 +478,7 @@ private static FunctionDefinition[][] snapshotFunctions() { def(AvgOverTime.class, uni(AvgOverTime::new), "avg_over_time"), def(LastOverTime.class, uni(LastOverTime::new), "last_over_time"), def(FirstOverTime.class, uni(FirstOverTime::new), "first_over_time"), + def(Score.class, uni(Score::new), Score.NAME), def(Term.class, bi(Term::new), "term"), def(Knn.class, Knn::new, "knn"), def(StGeohash.class, StGeohash::new, "st_geohash"), diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java index ec29b4b658c76..d499e29079963 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java @@ -16,6 +16,7 @@ import org.elasticsearch.compute.operator.ScoreOperator; import org.elasticsearch.index.IndexMode; import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.xpack.esql.action.EsqlCapabilities; import org.elasticsearch.xpack.esql.capabilities.PostAnalysisPlanVerificationAware; import org.elasticsearch.xpack.esql.capabilities.TranslationAware; import org.elasticsearch.xpack.esql.common.Failures; @@ -38,6 +39,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.convert.AbstractConvertFunction; import org.elasticsearch.xpack.esql.expression.predicate.logical.BinaryLogic; import org.elasticsearch.xpack.esql.expression.predicate.logical.Not; +import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.EsqlBinaryComparison; import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.EsRelation; @@ -50,6 +52,7 @@ import org.elasticsearch.xpack.esql.querydsl.query.TranslationAwareExpressionQuery; import org.elasticsearch.xpack.esql.score.ExpressionScoreMapper; +import java.util.ArrayList; import java.util.List; import java.util.Locale; import java.util.Map; @@ -195,6 +198,10 @@ private static void checkFullTextQueryFunctions(LogicalPlan plan, Failures failu if (plan instanceof Filter f) { Expression condition = f.condition(); + if (condition instanceof Score) { + failures.add(fail(condition, "[SCORE] function can't be used in WHERE")); + } + List.of(QueryString.class, Kql.class).forEach(functionClass -> { // Check for limitations of QSTR and KQL function. checkCommandsBeforeExpression( @@ -219,12 +226,38 @@ private static void checkFullTextQueryFunctions(LogicalPlan plan, Failures failu } else if (plan instanceof Aggregate agg) { checkFullTextFunctionsInAggs(agg, failures); } else { + List scoredFTFs = new ArrayList<>(); + plan.forEachExpression(Score.class, scoreFunction -> { + checkScoreFunction(plan, failures, scoreFunction); + plan.forEachExpression(FullTextFunction.class, scoredFTFs::add); + }); plan.forEachExpression(FullTextFunction.class, ftf -> { - failures.add(fail(ftf, "[{}] {} is only supported in WHERE and STATS commands", ftf.functionName(), ftf.functionType())); + if (scoredFTFs.remove(ftf) == false) { + failures.add( + fail( + ftf, + "[{}] {} is only supported in WHERE and STATS commands" + + (EsqlCapabilities.Cap.SCORE_FUNCTION.isEnabled() ? ", or in EVAL within score(.) function" : ""), + ftf.functionName(), + ftf.functionType() + ) + ); + } }); } } + private static void checkScoreFunction(LogicalPlan plan, Failures failures, Score scoreFunction) { + checkCommandsBeforeExpression( + plan, + scoreFunction.canonical(), + Score.class, + lp -> (lp instanceof Limit == false) && (lp instanceof Aggregate == false), + m -> "[" + m.functionName() + "] function", + failures + ); + } + private static void checkFullTextFunctionsInAggs(Aggregate agg, Failures failures) { agg.groupings().forEach(exp -> { exp.forEachDown(e -> { @@ -281,6 +314,7 @@ private static void checkFullTextFunctionsParents(Expression condition, Failures forEachFullTextFunctionParent(condition, (ftf, parent) -> { if ((parent instanceof FullTextFunction == false) && (parent instanceof BinaryLogic == false) + && (parent instanceof EsqlBinaryComparison == false) && (parent instanceof Not == false)) { failures.add( fail( diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextWritables.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextWritables.java index 18c0a22589baa..657017a76b1db 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextWritables.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextWritables.java @@ -28,6 +28,9 @@ public static List getNamedWriteables() { if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { entries.add(Term.ENTRY); } + if (EsqlCapabilities.Cap.SCORE_FUNCTION.isEnabled()) { + entries.add(Score.ENTRY); + } return Collections.unmodifiableList(entries); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/Score.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/Score.java new file mode 100644 index 0000000000000..1b471931eaa0e --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/Score.java @@ -0,0 +1,140 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.fulltext; + +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.EvalOperator; +import org.elasticsearch.compute.operator.ScoreOperator; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.function.Function; +import org.elasticsearch.xpack.esql.core.tree.NodeInfo; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.evaluator.mapper.EvaluatorMapper; +import org.elasticsearch.xpack.esql.expression.function.Example; +import org.elasticsearch.xpack.esql.expression.function.FunctionAppliesTo; +import org.elasticsearch.xpack.esql.expression.function.FunctionAppliesToLifecycle; +import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; +import org.elasticsearch.xpack.esql.expression.function.Param; +import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; +import org.elasticsearch.xpack.esql.score.ScoreMapper; + +import java.io.IOException; +import java.util.List; +import java.util.Objects; + +/** + * A function to be used to score specific portions of an ES|QL query e.g., in conjunction with + * an {@link org.elasticsearch.xpack.esql.plan.logical.Eval}. + */ +public class Score extends Function implements EvaluatorMapper { + + public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "score", Score::readFrom); + + public static final String NAME = "score"; + + @FunctionInfo( + returnType = "double", + preview = true, + appliesTo = { @FunctionAppliesTo(lifeCycle = FunctionAppliesToLifecycle.DEVELOPMENT) }, + description = "Scores an expression. Only full text functions will be scored. Returns scores for all the resulting docs.", + examples = { @Example(file = "score-function", tag = "score-function") } + ) + public Score( + Source source, + @Param( + name = "query", + type = { "boolean" }, + description = "Boolean expression that contains full text function(s) to be scored." + ) Expression scorableQuery + ) { + this(source, List.of(scorableQuery)); + } + + protected Score(Source source, List children) { + super(source, children); + } + + @Override + public DataType dataType() { + return DataType.DOUBLE; + } + + @Override + public Expression replaceChildren(List newChildren) { + return new Score(source(), newChildren); + } + + @Override + protected NodeInfo info() { + return NodeInfo.create(this, Score::new, children().getFirst()); + } + + @Override + public String getWriteableName() { + return NAME; + } + + @Override + public EvalOperator.ExpressionEvaluator.Factory toEvaluator(EvaluatorMapper.ToEvaluator toEvaluator) { + ScoreOperator.ExpressionScorer.Factory scorerFactory = ScoreMapper.toScorer(children().getFirst(), toEvaluator.shardContexts()); + return driverContext -> new ScorerEvaluatorFactory(scorerFactory).get(driverContext); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + source().writeTo(out); + out.writeNamedWriteableCollection(this.children()); + } + + private static Expression readFrom(StreamInput in) throws IOException { + Source source = Source.readFrom((PlanStreamInput) in); + Expression query = in.readOptionalNamedWriteable(Expression.class); + return new Score(source, query); + } + + private record ScorerEvaluatorFactory(ScoreOperator.ExpressionScorer.Factory scoreFactory) + implements + EvalOperator.ExpressionEvaluator.Factory { + + @Override + public EvalOperator.ExpressionEvaluator get(DriverContext context) { + return new EvalOperator.ExpressionEvaluator() { + + private final ScoreOperator.ExpressionScorer scorer = scoreFactory.get(context); + + @Override + public void close() { + scorer.close(); + } + + @Override + public Block eval(Page page) { + return scorer.score(page); + } + }; + } + } + + @Override + public boolean equals(Object o) { + if (o == null || getClass() != o.getClass()) return false; + Score score = (Score) o; + return super.equals(o) && score.children().equals(children()); + } + + @Override + public int hashCode() { + return Objects.hash(children()); + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java index ad6cb42f7f835..28204e2572842 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java @@ -528,7 +528,7 @@ private PhysicalOperation planEval(EvalExec eval, LocalExecutionPlannerContext c PhysicalOperation source = plan(eval.child(), context); for (Alias field : eval.fields()) { - var evaluatorSupplier = EvalMapper.toEvaluator(context.foldCtx(), field.child(), source.layout); + var evaluatorSupplier = EvalMapper.toEvaluator(context.foldCtx(), field.child(), source.layout, context.shardContexts); Layout.Builder layout = source.layout.builder(); layout.append(field.toAttribute()); source = source.with(new EvalOperatorFactory(evaluatorSupplier), layout.build()); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java index bdf2ba39edc66..89b129a142505 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java @@ -277,6 +277,10 @@ public final void test() throws Throwable { "can't use match in csv tests", testCase.requiredCapabilities.contains(EsqlCapabilities.Cap.MATCH_OPERATOR_COLON.capabilityName()) ); + assumeFalse( + "can't use score function in csv tests", + testCase.requiredCapabilities.contains(EsqlCapabilities.Cap.SCORE_FUNCTION.capabilityName()) + ); assumeFalse( "can't load metrics in csv tests", testCase.requiredCapabilities.contains(EsqlCapabilities.Cap.METRICS_COMMAND.capabilityName()) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index fc38af5569b98..407360f0bf5f2 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -1213,7 +1213,7 @@ public void testWeightedAvg() { public void testMatchInsideEval() throws Exception { assertEquals( - "1:36: [:] operator is only supported in WHERE and STATS commands\n" + "1:36: [:] operator is only supported in WHERE and STATS commands, or in EVAL within score(.) function\n" + "line 1:36: [:] operator cannot operate on [title], which is not a field from an index mapping", error("row title = \"brown fox\" | eval x = title:\"fox\" ") ); @@ -1373,17 +1373,25 @@ public void testFullTextFunctionsOnlyAllowedInWhere() throws Exception { if (EsqlCapabilities.Cap.KNN_FUNCTION_V2.isEnabled()) { checkFullTextFunctionsOnlyAllowedInWhere("KNN", "knn(vector, [0, 1, 2], 10)", "function"); } + } private void checkFullTextFunctionsOnlyAllowedInWhere(String functionName, String functionInvocation, String functionType) throws Exception { assertThat( error("from test | eval y = " + functionInvocation, fullTextAnalyzer), - containsString("[" + functionName + "] " + functionType + " is only supported in WHERE and STATS commands") + containsString( + "[" + + functionName + + "] " + + functionType + + " is only supported in WHERE and STATS commands, or in EVAL within score(.) function" + ) ); assertThat( error("from test | sort " + functionInvocation + " asc", fullTextAnalyzer), containsString("[" + functionName + "] " + functionType + " is only supported in WHERE and STATS commands") + ); assertThat( error("from test | stats max_id = max(id) by " + functionInvocation, fullTextAnalyzer), diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/ScoreTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/ScoreTests.java new file mode 100644 index 0000000000000..346b1cafa02f4 --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/ScoreTests.java @@ -0,0 +1,72 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.fulltext; + +import com.carrotsearch.randomizedtesting.annotations.Name; +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.expression.function.FunctionName; +import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier; +import org.elasticsearch.xpack.esql.io.stream.PlanStreamOutput; + +import java.util.ArrayList; +import java.util.List; +import java.util.function.Supplier; + +import static org.elasticsearch.xpack.esql.SerializationTestUtils.serializeDeserialize; +import static org.elasticsearch.xpack.esql.core.type.DataType.BOOLEAN; +import static org.elasticsearch.xpack.esql.core.type.DataType.DOUBLE; +import static org.hamcrest.Matchers.equalTo; + +@FunctionName("score") +public class ScoreTests extends AbstractMatchFullTextFunctionTests { + + public ScoreTests(@Name("TestCase") Supplier testCaseSupplier) { + this.testCase = testCaseSupplier.get(); + } + + @ParametersFactory + public static Iterable parameters() { + List suppliers = new ArrayList<>(); + suppliers.add( + new TestCaseSupplier( + List.of(BOOLEAN), + () -> new TestCaseSupplier.TestCase( + List.of(new TestCaseSupplier.TypedData(randomBoolean(), BOOLEAN, "query")), + equalTo("ScoreEvaluator" + ScoreTests.class.getSimpleName()), + DOUBLE, + equalTo(true) + ) + ) + ); + + return parameterSuppliersFromTypedData(suppliers); + } + + @Override + protected Expression build(Source source, List args) { + return new Score(source, args.getFirst()); + } + + /** + * Copy of the overridden method that doesn't check for children size, as the {@code options} child isn't serialized in Match. + */ + @Override + protected Expression serializeDeserializeExpression(Expression expression) { + Expression newExpression = serializeDeserialize( + expression, + PlanStreamOutput::writeNamedWriteable, + in -> in.readNamedWriteable(Expression.class), + testCase.getConfiguration() // The configuration query should be == to the source text of the function for this to work + ); + // Fields use synthetic sources, which can't be serialized. So we use the originals instead. + return newExpression.replaceChildren(expression.children()); + } +}