diff --git a/docs/changelog/127661.yaml b/docs/changelog/127661.yaml new file mode 100644 index 0000000000000..66c8336963271 --- /dev/null +++ b/docs/changelog/127661.yaml @@ -0,0 +1,5 @@ +pr: 127661 +summary: Add MATCH_PHRASE +area: ES|QL +type: enhancement +issues: [] diff --git a/docs/changelog/128925.yaml b/docs/changelog/128925.yaml new file mode 100644 index 0000000000000..dd41f7366ddd1 --- /dev/null +++ b/docs/changelog/128925.yaml @@ -0,0 +1,5 @@ +pr: 128925 +summary: ES|QL - Add `match_phrase` full text function (tech preview) +area: ES|QL +type: enhancement +issues: [] diff --git a/docs/reference/esql/functions/description/match_phrase.asciidoc b/docs/reference/esql/functions/description/match_phrase.asciidoc new file mode 100644 index 0000000000000..673ecdbfc9595 --- /dev/null +++ b/docs/reference/esql/functions/description/match_phrase.asciidoc @@ -0,0 +1,5 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Description* + +Use `MATCH_PHRASE` to perform a [`match_phrase`](/reference/query-languages/query-dsl/query-dsl-match-query-phrase.md) on the specified field. Using `MATCH_PHRASE` is equivalent to using the `match_phrase` query in the Elasticsearch Query DSL. MatchPhrase can be used on <> fields, as well as other field types like keyword, boolean, or date types. MatchPhrase is not supported for <> or numeric types. MatchPhrase can use <> to specify additional options for the match_phrase query. All [`match_phrase`](/reference/query-languages/query-dsl/query-dsl-match-query-phrase.md) query parameters are supported. `MATCH_PHRASE` returns true if the provided query matches the row. diff --git a/docs/reference/esql/functions/examples/match_phrase.asciidoc b/docs/reference/esql/functions/examples/match_phrase.asciidoc new file mode 100644 index 0000000000000..fe148c33eefba --- /dev/null +++ b/docs/reference/esql/functions/examples/match_phrase.asciidoc @@ -0,0 +1,13 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Example* + +[source.merge.styled,esql] +---- +include::{esql-specs}/match-phrase-function.csv-spec[tag=match-phrase-with-field] +---- +[%header.monospaced.styled,format=dsv,separator=|] +|=== +include::{esql-specs}/match-phrase-function.csv-spec[tag=match-phrase-with-field-result] +|=== + diff --git a/docs/reference/esql/functions/functionNamedParams/match_phrase.asciidoc b/docs/reference/esql/functions/functionNamedParams/match_phrase.asciidoc new file mode 100644 index 0000000000000..7fc0c1b61aac4 --- /dev/null +++ b/docs/reference/esql/functions/functionNamedParams/match_phrase.asciidoc @@ -0,0 +1,12 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Supported function named parameters* + +[%header.monospaced.styled,format=dsv,separator=|] +|=== +name | types | description +zero_terms_query | [keyword] | Indicates whether all documents or none are returned if the analyzer removes all tokens, such as when using a stop filter. Defaults to none. +boost | [float] | Floating point number used to decrease or increase the relevance scores of the query. Defaults to 1.0. +analyzer | [keyword] | Analyzer used to convert the text in the query value into token. Defaults to the index-time analyzer mapped for the field. If no analyzer is mapped, the index’s default analyzer is used. +slop | [integer] | Maximum number of positions allowed between matching tokens. Defaults to 0. Transposed terms have a slop of 2. +|=== diff --git a/docs/reference/esql/functions/kibana/definition/match_phrase.json b/docs/reference/esql/functions/kibana/definition/match_phrase.json new file mode 100644 index 0000000000000..3796d7e21ec4e --- /dev/null +++ b/docs/reference/esql/functions/kibana/definition/match_phrase.json @@ -0,0 +1,63 @@ +{ + "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.", + "type" : "eval", + "name" : "match_phrase", + "description" : "Use `MATCH_PHRASE` to perform a [`match_phrase`](/reference/query-languages/query-dsl/query-dsl-match-query-phrase.md) on the\nspecified field.\nUsing `MATCH_PHRASE` is equivalent to using the `match_phrase` query in the Elasticsearch Query DSL.\n\nMatchPhrase can be used on <> fields, as well as other field types like keyword, boolean, or date types.\nMatchPhrase is not supported for <> or numeric types.\n\nMatchPhrase can use <> to specify additional options for the\nmatch_phrase query.\nAll [`match_phrase`](/reference/query-languages/query-dsl/query-dsl-match-query-phrase.md) query parameters are supported.\n\n`MATCH_PHRASE` returns true if the provided query matches the row.", + "signatures" : [ + { + "params" : [ + { + "name" : "field", + "type" : "keyword", + "optional" : false, + "description" : "Field that the query will target." + }, + { + "name" : "query", + "type" : "keyword", + "optional" : false, + "description" : "Value to find in the provided field." + }, + { + "name" : "options", + "type" : "function_named_parameters", + "mapParams" : "{name='zero_terms_query', values=[none, all], description='Indicates whether all documents or none are returned if the analyzer removes all tokens, such as when using a stop filter. Defaults to none.'}, {name='boost', values=[2.5], description='Floating point number used to decrease or increase the relevance scores of the query. Defaults to 1.0.'}, {name='analyzer', values=[standard], description='Analyzer used to convert the text in the query value into token. Defaults to the index-time analyzer mapped for the field. If no analyzer is mapped, the index’s default analyzer is used.'}, {name='slop', values=[1], description='Maximum number of positions allowed between matching tokens. Defaults to 0. Transposed terms have a slop of 2.'}", + "optional" : true, + "description" : "(Optional) MatchPhrase additional options as <>. See [`match_phrase`](/reference/query-languages/query-dsl/query-dsl-match-query-phrase.md) for more information." + } + ], + "variadic" : false, + "returnType" : "boolean" + }, + { + "params" : [ + { + "name" : "field", + "type" : "text", + "optional" : false, + "description" : "Field that the query will target." + }, + { + "name" : "query", + "type" : "keyword", + "optional" : false, + "description" : "Value to find in the provided field." + }, + { + "name" : "options", + "type" : "function_named_parameters", + "mapParams" : "{name='zero_terms_query', values=[none, all], description='Indicates whether all documents or none are returned if the analyzer removes all tokens, such as when using a stop filter. Defaults to none.'}, {name='boost', values=[2.5], description='Floating point number used to decrease or increase the relevance scores of the query. Defaults to 1.0.'}, {name='analyzer', values=[standard], description='Analyzer used to convert the text in the query value into token. Defaults to the index-time analyzer mapped for the field. If no analyzer is mapped, the index’s default analyzer is used.'}, {name='slop', values=[1], description='Maximum number of positions allowed between matching tokens. Defaults to 0. Transposed terms have a slop of 2.'}", + "optional" : true, + "description" : "(Optional) MatchPhrase additional options as <>. See [`match_phrase`](/reference/query-languages/query-dsl/query-dsl-match-query-phrase.md) for more information." + } + ], + "variadic" : false, + "returnType" : "boolean" + } + ], + "examples" : [ + "FROM books\n| WHERE MATCH_PHRASE(author, \"William Faulkner\")" + ], + "preview" : true, + "snapshot_only" : false +} diff --git a/docs/reference/esql/functions/kibana/docs/match_phrase.md b/docs/reference/esql/functions/kibana/docs/match_phrase.md new file mode 100644 index 0000000000000..7d7c05cfb1ae0 --- /dev/null +++ b/docs/reference/esql/functions/kibana/docs/match_phrase.md @@ -0,0 +1,22 @@ + + +### MATCH_PHRASE +Use `MATCH_PHRASE` to perform a [`match_phrase`](/reference/query-languages/query-dsl/query-dsl-match-query-phrase.md) on the +specified field. +Using `MATCH_PHRASE` is equivalent to using the `match_phrase` query in the Elasticsearch Query DSL. + +MatchPhrase can be used on <> fields, as well as other field types like keyword, boolean, or date types. +MatchPhrase is not supported for <> or numeric types. + +MatchPhrase can use <> to specify additional options for the +match_phrase query. +All [`match_phrase`](/reference/query-languages/query-dsl/query-dsl-match-query-phrase.md) query parameters are supported. + +`MATCH_PHRASE` returns true if the provided query matches the row. + +``` +FROM books +| WHERE MATCH_PHRASE(author, "William Faulkner") +``` diff --git a/docs/reference/esql/functions/layout/match_phrase.asciidoc b/docs/reference/esql/functions/layout/match_phrase.asciidoc new file mode 100644 index 0000000000000..9567e83a1e98b --- /dev/null +++ b/docs/reference/esql/functions/layout/match_phrase.asciidoc @@ -0,0 +1,18 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +[discrete] +[[esql-match_phrase]] +=== `MATCH_PHRASE` + +preview::["Do not use on production environments. This functionality is in technical preview and may be changed or removed in a future release. Elastic will work to fix any issues, but features in technical preview are not subject to the support SLA of official GA features."] + +*Syntax* + +[.text-center] +image::esql/functions/signature/match_phrase.svg[Embedded,opts=inline] + +include::../parameters/match_phrase.asciidoc[] +include::../description/match_phrase.asciidoc[] +include::../types/match_phrase.asciidoc[] +include::../functionNamedParams/match_phrase.asciidoc[] +include::../examples/match_phrase.asciidoc[] diff --git a/docs/reference/esql/functions/parameters/match_phrase.asciidoc b/docs/reference/esql/functions/parameters/match_phrase.asciidoc new file mode 100644 index 0000000000000..dd7d8e286579c --- /dev/null +++ b/docs/reference/esql/functions/parameters/match_phrase.asciidoc @@ -0,0 +1,12 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Parameters* + +`field`:: +Field that the query will target. + +`query`:: +Value to find in the provided field. + +`options`:: +(Optional) MatchPhrase additional options as <>. See [`match_phrase`](/reference/query-languages/query-dsl/query-dsl-match-query-phrase.md) for more information. diff --git a/docs/reference/esql/functions/signature/match_phrase.svg b/docs/reference/esql/functions/signature/match_phrase.svg new file mode 100644 index 0000000000000..52b126d35a70c --- /dev/null +++ b/docs/reference/esql/functions/signature/match_phrase.svg @@ -0,0 +1 @@ +MATCH_PHRASE(field,query,options) \ No newline at end of file diff --git a/docs/reference/esql/functions/types/match_phrase.asciidoc b/docs/reference/esql/functions/types/match_phrase.asciidoc new file mode 100644 index 0000000000000..aa728a10016bf --- /dev/null +++ b/docs/reference/esql/functions/types/match_phrase.asciidoc @@ -0,0 +1,10 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Supported types* + +[%header.monospaced.styled,format=dsv,separator=|] +|=== +field | query | options | result +keyword | keyword | named parameters | boolean +text | keyword | named parameters | boolean +|=== diff --git a/server/src/main/java/org/elasticsearch/index/query/MatchPhraseQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/MatchPhraseQueryBuilder.java index c0e5758de81b4..86f5988135d79 100644 --- a/server/src/main/java/org/elasticsearch/index/query/MatchPhraseQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/MatchPhraseQueryBuilder.java @@ -129,6 +129,11 @@ public MatchPhraseQueryBuilder zeroTermsQuery(ZeroTermsQueryOption zeroTermsQuer return this; } + public MatchPhraseQueryBuilder zeroTermsQuery(String zeroTermsQueryString) { + ZeroTermsQueryOption zeroTermsQueryOption = ZeroTermsQueryOption.readFromString(zeroTermsQueryString); + return zeroTermsQuery(zeroTermsQueryOption); + } + public ZeroTermsQueryOption zeroTermsQuery() { return this.zeroTermsQuery; } diff --git a/server/src/main/java/org/elasticsearch/index/query/ZeroTermsQueryOption.java b/server/src/main/java/org/elasticsearch/index/query/ZeroTermsQueryOption.java index 2b07d40ab43a0..792347b08ecb6 100644 --- a/server/src/main/java/org/elasticsearch/index/query/ZeroTermsQueryOption.java +++ b/server/src/main/java/org/elasticsearch/index/query/ZeroTermsQueryOption.java @@ -55,6 +55,15 @@ public static ZeroTermsQueryOption readFromStream(StreamInput in) throws IOExcep throw new ElasticsearchException("unknown serialized type [" + ord + "]"); } + public static ZeroTermsQueryOption readFromString(String input) { + for (ZeroTermsQueryOption zeroTermsQuery : ZeroTermsQueryOption.values()) { + if (zeroTermsQuery.name().equalsIgnoreCase(input)) { + return zeroTermsQuery; + } + } + throw new ElasticsearchException("unknown serialized type [" + input + "]"); + } + @Override public void writeTo(StreamOutput out) throws IOException { out.writeVInt(this.ordinal); diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/match-phrase-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/match-phrase-function.csv-spec new file mode 100644 index 0000000000000..5f2a9c85117c2 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/match-phrase-function.csv-spec @@ -0,0 +1,455 @@ +############################################### +# Tests for MatchPhrase function +# + +matchPhraseWithField +required_capability: match_phrase_function + +// tag::match-phrase-with-field[] +FROM books +| WHERE MATCH_PHRASE(author, "William Faulkner") +// end::match-phrase-with-field[] +| KEEP book_no, author +| SORT book_no +| LIMIT 5 +; + +// tag::match-phrase-with-field-result[] +book_no:keyword | author:text +2713 | William Faulkner +2883 | William Faulkner +4724 | William Faulkner +4977 | William Faulkner +5119 | William Faulkner +// end::match-phrase-with-field-result[] +; + +matchPhraseWithMultipleFunctions +required_capability: match_phrase_function + +from books +| where match_phrase(title, "Return of the King") AND match_phrase(author, "J. R. R. Tolkien") +| keep book_no, title; +ignoreOrder:true + +book_no:keyword | title:text +2714 | Return of the King Being the Third Part of The Lord of the Rings +; + +matchPhraseWithQueryExpressions +required_capability: match_phrase_function + +from books +| where match_phrase(title, CONCAT("Return of the", " King")) +| keep book_no, title; +ignoreOrder:true + +book_no:keyword | title:text +2714 | Return of the King Being the Third Part of The Lord of the Rings +; + +matchPhraseAfterKeep +required_capability: match_phrase_function + +from books +| keep book_no, author +| where match_phrase(author, "William Faulkner") +| sort book_no +| limit 5; + +book_no:keyword | author:text +2713 | William Faulkner +2883 | William Faulkner +4724 | William Faulkner +4977 | William Faulkner +5119 | William Faulkner +; + +matchPhraseAfterDrop +required_capability: match_phrase_function + +from books +| drop ratings, description, year, publisher, title, author.keyword +| where match_phrase(author, "William Faulkner") +| keep book_no, author +| sort book_no +| limit 5; + +book_no:keyword | author:text +2713 | William Faulkner +2883 | William Faulkner +4724 | William Faulkner +4977 | William Faulkner +5119 | William Faulkner +; + +matchPhraseAfterEval +required_capability: match_phrase_function + +from books +| eval stars = to_long(ratings / 2.0) +| where match_phrase(author, "William Faulkner") +| sort book_no +| keep book_no, author, stars +| limit 5; + +book_no:keyword | author:text | stars:long +2713 | William Faulkner | 2 +2883 | William Faulkner | 2 +4724 | William Faulkner | 2 +4977 | William Faulkner | 2 +5119 | William Faulkner | 2 +; + +matchPhraseWithConjunction +required_capability: match_phrase_function + +from books +| where match_phrase(title, "Lord of the Rings") and ratings > 4.6 +| keep book_no, title; +ignoreOrder:true + +book_no:keyword | title:text +4023 |A Tolkien Compass: Including J. R. R. Tolkien's Guide to the Names in The Lord of the Rings +7140 |The Lord of the Rings Poster Collection: Six Paintings by Alan Lee (No. 1) +; + +matchPhraseWithDisjunction +required_capability: match_phrase_function +required_capability: full_text_functions_disjunctions + +from books +| where match_phrase(author, "Kurt Vonnegut") or match_phrase(author, "Carole Guinane") +| keep book_no, author; +ignoreOrder:true + +book_no:keyword | author:text +2464 | Kurt Vonnegut +8956 | Kurt Vonnegut +3950 | Kurt Vonnegut +4382 | Carole Guinane +; + +matchPhraseWithDisjunctionAndFiltersConjunction +required_capability: match_phrase_function +required_capability: full_text_functions_disjunctions + +from books +| where (match_phrase(author, "Edith Vonnegut") or match_phrase(author, "Carole Guinane")) and year > 1997 +| keep book_no, author, year; +ignoreOrder:true + +book_no:keyword | author:text | year:integer +6970 | Edith Vonnegut | 1998 +4382 | Carole Guinane | 2001 +; + +matchPhraseWithDisjunctionAndConjunction +required_capability: match_phrase_function +required_capability: full_text_functions_disjunctions + +from books +| where (match_phrase(author, "Kurt Vonnegut") or match_phrase(author, "Gabriel Garcia Marquez")) and match_phrase(description, "realism") +| keep book_no; + +book_no:keyword +4814 +; + +matchPhraseWithMoreComplexDisjunctionAndConjunction +required_capability: match_phrase_function +required_capability: full_text_functions_disjunctions + +from books +| where (match_phrase(author, "Edith Vonnegut") and match_phrase(description, "charming and insightful")) or (match_phrase(author, "Gabriel Garcia Marquez") and match_phrase(description, "realism")) +| keep book_no; +ignoreOrder:true + +book_no:keyword +6970 +4814 +; + +matchPhraseWithDisjunctionIncludingConjunction +required_capability: match_phrase_function +required_capability: full_text_functions_disjunctions + +from books +| where match_phrase(author, "Kurt Vonnegut") or (match_phrase(author, "Gabriel Garcia Marquez") and match_phrase(description, "realism")) +| keep book_no; +ignoreOrder:true + +book_no:keyword +2464 +4814 +8956 +3950 +; + +matchPhraseWithFunctionPushedToLucene +required_capability: match_phrase_function + +from hosts +| where match_phrase(host, "beta") and cidr_match(ip1, "127.0.0.2/32", "127.0.0.3/32") +| keep card, host, ip0, ip1; +ignoreOrder:true + +card:keyword |host:keyword |ip0:ip |ip1:ip +eth1 |beta |127.0.0.1 |127.0.0.2 +; + +matchPhraseWithNonPushableConjunction +required_capability: match_phrase_function + +from books +| where match_phrase(title, "Lord of the Rings") and length(title) > 75 +| keep book_no, title; +ignoreOrder:true + +book_no:keyword | title:text +4023 | A Tolkien Compass: Including J. R. R. Tolkien's Guide to the Names in The Lord of the Rings +; + +matchPhraseWithMultipleWhereClauses +required_capability: match_phrase_function + +from books +| where match_phrase(title, "Lord of") +| where match_phrase(title, "the Rings") +| keep book_no, title; +ignoreOrder:true + +book_no:keyword | title:text +2675 | The Lord of the Rings - Boxed Set +2714 | Return of the King Being the Third Part of The Lord of the Rings +4023 | A Tolkien Compass: Including J. R. R. Tolkien's Guide to the Names in The Lord of the Rings +7140 | The Lord of the Rings Poster Collection: Six Paintings by Alan Lee (No. 1) +; + +matchPhraseMultivaluedField +required_capability: match_phrase_function + +from employees +| where match_phrase(job_positions, "Tech Lead") and match_phrase(job_positions, "Reporting Analyst") +| keep emp_no, first_name, last_name; +ignoreOrder:true + +emp_no:integer | first_name:keyword | last_name:keyword +10004 | Chirstian | Koblick +10010 | Duangkaew | Piveteau +10011 | Mary | Sluis +10088 | Jungsoon | Syrzycki +10093 | Sailaja | Desikan +10097 | Remzi | Waschkowski +; + +testMultiValuedFieldWithConjunction +required_capability: match_phrase_function + +from employees +| where match_phrase(job_positions, "Data Scientist") and match_phrase(job_positions, "Support Engineer") +| keep emp_no, first_name, last_name; +ignoreOrder:true + +emp_no:integer | first_name:keyword | last_name:keyword +10043 | Yishay | Tzvieli +; + +testMatchPhraseAndQueryStringFunctions +required_capability: match_phrase_function +required_capability: qstr_function + +from employees +| where match_phrase(job_positions, "Data Scientist") and qstr("job_positions: (Support Engineer) and gender: F") +| keep emp_no, first_name, last_name; +ignoreOrder:true + +emp_no:integer | first_name:keyword | last_name:keyword +10041 | Uri | Lenart +10043 | Yishay | Tzvieli +; + +testMatchPhraseWithOptionsSlop +required_capability: match_phrase_function +from books +| where match_phrase(title, "Lord of Rings", {"slop": 5}) +| keep book_no; +ignoreOrder:true + +book_no:keyword +2714 +2675 +4023 +7140 +; + +testMatchPhraseWithOptionsZeroTermsNone +required_capability: match_phrase_function +from books +| where match_phrase(title, "", {"zero_terms_query": "none"}) +| keep book_no; +ignoreOrder:true + +book_no:keyword +; + +testMatchPhraseWithOptionsZeroTermsAll +required_capability: match_phrase_function +from books +| where match_phrase(title, "", {"zero_terms_query": "all"}) +| sort book_no +| keep book_no +| limit 5; + +book_no:keyword +1211 +1463 +1502 +1937 +1985 +; + + +testMatchPhraseWithOptionsAnalyzer +required_capability: match_phrase_function +from books +| where match_phrase(title, "Lord of the Rings", {"analyzer": "standard"}) +| keep book_no; +ignoreOrder:true + +book_no:keyword +2714 +2675 +4023 +7140 +; + + +testMatchPhraseWithOptionsSlop +required_capability: match_phrase_function +from books +| where match_phrase(title, "Lord of Rings", {"slop": 3, "analyzer": "standard", "zero_terms_query": "none"}) +| keep book_no; +ignoreOrder:true + +book_no:keyword +2714 +2675 +4023 +7140 +; + +testMatchPhraseWithOptionsBoost +required_capability: match_phrase_function +from books +| where match_phrase(title, "Lord of the Rings", {"boost": 5}) +| keep book_no; +ignoreOrder:true + +book_no:keyword +2714 +2675 +4023 +7140 +; + +testMatchPhraseInStatsNonPushable +required_capability: match_phrase_function +required_capability: full_text_functions_in_stats_where + +from books +| where length(title) > 40 +| stats c = count(*) where match_phrase(title, "Lord of the Rings") +; + +c:long +3 +; + +testMatchPhraseInStatsPushableAndNonPushable +required_capability: match_phrase_function +required_capability: full_text_functions_in_stats_where + +from books +| stats c = count(*) where (match_phrase(title, "lord of the rings") and ratings > 4.5) or (match(author, "fyodor dostoevsky") and length(title) > 50) +; + +c:long +6 +; + +testMatchPhraseInStatsPushable +required_capability: match_phrase_function +required_capability: full_text_functions_in_stats_where + +from books +| stats c = count(*) where match_phrase(author, "j. r. r. tolkien") +; + +c:long +9 +; + +testMatchPhraseInStatsWithOptions +required_capability: match_phrase_function +required_capability: full_text_functions_in_stats_where + +FROM books +| STATS c = count(*) where match_phrase(title, "There and Back Again", {"slop": "5"}) +; + +c:long +1 +; + +testMatchPhraseInStatsWithNonPushableDisjunctions +required_capability: match_phrase_function +required_capability: full_text_functions_in_stats_where + +FROM books +| STATS c = count(*) where match_phrase(title, "lord of the rings") or length(title) > 130 +; + +c:long +5 +; + +testMatchPhraseInStatsWithMultipleAggs +required_capability: match_phrase_function +required_capability: full_text_functions_in_stats_where +FROM books +| STATS c = count(*) where match_phrase(title, "lord of the rings"), m = max(book_no::integer) where match_phrase(author, "j. r. r. tolkien"), n = min(book_no::integer) where match_phrase(author, "fyodor dostoevsky") +; + +c:long | m:integer | n:integer +4 | 7670 | 1211 +; + + +testMatchPhraseInStatsWithGrouping +required_capability: match_phrase_function +required_capability: full_text_functions_in_stats_where +FROM books +| STATS r = AVG(ratings) where match_phrase(title, "Lord of the Rings") by author | WHERE r is not null +; +ignoreOrder: true + +r:double | author: text +4.75 | Alan Lee +4.674999952316284 | J. R. R. Tolkien +4.670000076293945 | John Ronald Reuel Tolkien +4.670000076293945 | Agnes Perkins +4.670000076293945 | Charles Adolph Huttar +4.670000076293945 | Walter Scheps +4.559999942779541 | J.R.R. Tolkien +; + +testMatchPhraseRequiresExactPhraseMatch +required_capability: match_phrase_function +from books +| where match_phrase(title, "Lord Rings") +| keep book_no +; + +book_no:keyword +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/scoring.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/scoring.csv-spec index fd3870a3a1bfe..5e66fde2b1f34 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/scoring.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/scoring.csv-spec @@ -491,3 +491,31 @@ from books metadata _score avg_score:double | max_score:double | min_score:double 3.869828939437866 | 5.123856544494629 | 3.0124807357788086 ; + +testMatchPhraseWithScore + +required_capability: match_phrase_function +required_capability: metadata_score + +from books metadata _score +| where match_phrase(title, "J. R. R. Tolkien") +| keep book_no, title, author, _score +; + +book_no:keyword | title:text | author:text | _score:double + 5335 | Letters of J R R Tolkien | J.R.R. Tolkien | 9.017186164855957 + 2130 | The J. R. R. Tolkien Audio Collection | [Christopher Tolkien, John Ronald Reuel Tolkien] | 8.412636756896973 +; + +testMatchPhraseWithScoreBoost +required_capability: match_phrase_function + +from books metadata _score +| where match_phrase(title, "J. R. R. Tolkien", {"boost": 5}) +| keep book_no, title, author, _score +; + +book_no:keyword | title:text | author:text | _score:double + 5335 | Letters of J R R Tolkien | J.R.R. Tolkien | 45.0859260559082 + 2130 | The J. R. R. Tolkien Audio Collection | [Christopher Tolkien, John Ronald Reuel Tolkien] | 42.06318283081055 +; diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/MatchPhraseFunctionIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/MatchPhraseFunctionIT.java new file mode 100644 index 0000000000000..44f28e0c9ea93 --- /dev/null +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/MatchPhraseFunctionIT.java @@ -0,0 +1,346 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.plugin; + +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.action.index.IndexRequest; +import org.elasticsearch.action.support.WriteRequest; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.xpack.esql.VerificationException; +import org.elasticsearch.xpack.esql.action.AbstractEsqlIntegTestCase; +import org.hamcrest.Matchers; +import org.junit.Before; + +import java.util.Collections; +import java.util.List; + +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; +import static org.elasticsearch.xpack.esql.EsqlTestUtils.getValuesList; +import static org.hamcrest.CoreMatchers.containsString; + +//@TestLogging(value = "org.elasticsearch.xpack.esql:TRACE,org.elasticsearch.compute:TRACE", reason = "debug") +public class MatchPhraseFunctionIT extends AbstractEsqlIntegTestCase { + + @Before + public void setupIndex() { + createAndPopulateIndex(); + } + + public void testSimpleWhereMatchPhrase() { + var query = """ + FROM test + | WHERE match_phrase(content, "brown fox") + | KEEP id + | SORT id + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id")); + assertColumnTypes(resp.columns(), List.of("integer")); + assertValues(resp.values(), List.of(List.of(1), List.of(6))); + } + } + + public void testSimpleWhereMatchPhraseNoResults() { + var query = """ + FROM test + | WHERE match_phrase(content, "fox brown") + | KEEP id + | SORT id + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id")); + assertColumnTypes(resp.columns(), List.of("integer")); + assertValues(resp.values(), Collections.emptyList()); + } + } + + public void testSimpleWhereMatchPhraseAndSlop() { + var query = """ + FROM test + | WHERE match_phrase(content, "fox brown", {"slop": 5}) + | KEEP id + | SORT id + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id")); + assertColumnTypes(resp.columns(), List.of("integer")); + assertValues(resp.values(), List.of(List.of(1), List.of(6))); + } + } + + public void testCombinedWhereMatchPhrase() { + var query = """ + FROM test + | WHERE match_phrase(content, "brown fox") AND id > 5 + | KEEP id + | SORT id + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id")); + assertColumnTypes(resp.columns(), List.of("integer")); + assertValues(resp.values(), List.of(List.of(6))); + } + } + + public void testMultipleMatchPhrase() { + var query = """ + FROM test + | WHERE match_phrase(content, "the quick") AND match_phrase(content, "brown fox") + | KEEP id + | SORT id + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id")); + assertColumnTypes(resp.columns(), List.of("integer")); + assertValues(resp.values(), List.of(List.of(6))); + } + } + + public void testMultipleWhereMatchPhrase() { + var query = """ + FROM test + | WHERE match_phrase(content, "the quick") AND match_phrase(content, "brown fox") + | EVAL summary = CONCAT("document with id: ", to_str(id), "and content: ", content) + | SORT summary + | LIMIT 4 + | WHERE match_phrase(content, "lazy dog") + | KEEP id + """; + + var error = expectThrows(ElasticsearchException.class, () -> run(query)); + assertThat(error.getMessage(), containsString("[MatchPhrase] function cannot be used after LIMIT")); + } + + public void testNotWhereMatchPhrase() { + var query = """ + FROM test + | WHERE NOT match_phrase(content, "brown fox") + | KEEP id + | SORT id + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id")); + assertColumnTypes(resp.columns(), List.of("integer")); + assertValues(resp.values(), List.of(List.of(2), List.of(3), List.of(4), List.of(5))); + } + } + + public void testWhereMatchPhraseWithScoring() { + var query = """ + FROM test + METADATA _score + | WHERE match_phrase(content, "brown fox") + | KEEP id, _score + | SORT id ASC + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id", "_score")); + assertColumnTypes(resp.columns(), List.of("integer", "double")); + assertValues(resp.values(), List.of(List.of(1, 1.4274532794952393), List.of(6, 1.1248723268508911))); + } + } + + public void testWhereMatchPhraseWithScoringDifferentSort() { + + var query = """ + FROM test + METADATA _score + | WHERE match_phrase(content, "brown fox") + | KEEP id, _score + | SORT id DESC + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id", "_score")); + assertColumnTypes(resp.columns(), List.of("integer", "double")); + assertValues(resp.values(), List.of(List.of(6, 1.1248723268508911), List.of(1, 1.4274532794952393))); + } + } + + public void testWhereMatchPhraseWithScoringSortScore() { + var query = """ + FROM test + METADATA _score + | WHERE match_phrase(content, "brown fox") + | KEEP id, _score + | SORT _score DESC + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id", "_score")); + assertColumnTypes(resp.columns(), List.of("integer", "double")); + assertValues(resp.values(), List.of(List.of(1, 1.4274532794952393), List.of(6, 1.1248723268508911))); + } + } + + public void testWhereMatchPhraseWithScoringNoSort() { + var query = """ + FROM test + METADATA _score + | WHERE match_phrase(content, "brown fox") + | KEEP id, _score + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id", "_score")); + assertColumnTypes(resp.columns(), List.of("integer", "double")); + assertValuesInAnyOrder(resp.values(), List.of(List.of(1, 1.4274532794952393), List.of(6, 1.1248723268508911))); + } + } + + public void testNonExistingColumn() { + var query = """ + FROM test + | WHERE match_phrase(something, "brown fox") + """; + + var error = expectThrows(VerificationException.class, () -> run(query)); + assertThat(error.getMessage(), containsString("Unknown column [something]")); + } + + public void testWhereMatchPhraseEvalColumn() { + var query = """ + FROM test + | EVAL upper_content = to_upper(content) + | WHERE match_phrase(upper_content, "BROWN FOX") + | KEEP id + """; + + var error = expectThrows(VerificationException.class, () -> run(query)); + assertThat( + error.getMessage(), + containsString("[MatchPhrase] function cannot operate on [upper_content], which is not a field from an index mapping") + ); + } + + public void testWhereMatchPhraseOverWrittenColumn() { + var query = """ + FROM test + | DROP content + | EVAL content = CONCAT("document with ID ", to_str(id)) + | WHERE match_phrase(content, "document content") + """; + + var error = expectThrows(VerificationException.class, () -> run(query)); + assertThat( + error.getMessage(), + containsString("[MatchPhrase] function cannot operate on [content], which is not a field from an index mapping") + ); + } + + public void testWhereMatchPhraseAfterStats() { + var query = """ + FROM test + | STATS count(*) + | WHERE match_phrase(content, "brown fox") + """; + + var error = expectThrows(VerificationException.class, () -> run(query)); + assertThat(error.getMessage(), containsString("Unknown column [content]")); + } + + public void testWhereMatchPhraseNotPushedDown() { + var query = """ + FROM test + | WHERE match_phrase(content, "brown fox") OR length(content) < 20 + | KEEP id + | SORT id + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id")); + assertColumnTypes(resp.columns(), List.of("integer")); + assertValues(resp.values(), List.of(List.of(1), List.of(2), List.of(6))); + } + } + + public void testWhereMatchPhraseWithRow() { + var query = """ + ROW content = "a brown fox" + | WHERE match_phrase(content, "brown fox") + """; + + var error = expectThrows(ElasticsearchException.class, () -> run(query)); + assertThat( + error.getMessage(), + containsString("line 2:22: [MatchPhrase] function cannot operate on [content], which is not a field from an index mapping") + ); + } + + public void testMatchPhraseWithStats() { + var errorQuery = """ + FROM test + | STATS c = count(*) BY match_phrase(content, "brown fox") + """; + + var error = expectThrows(ElasticsearchException.class, () -> run(errorQuery)); + assertThat(error.getMessage(), containsString("[MatchPhrase] function is only supported in WHERE and STATS commands")); + + var query = """ + FROM test + | STATS c = count(*) WHERE match_phrase(content, "brown fox"), d = count(*) WHERE match_phrase(content, "lazy dog") + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("c", "d")); + assertColumnTypes(resp.columns(), List.of("long", "long")); + assertValues(resp.values(), List.of(List.of(2L, 1L))); + } + + query = """ + FROM test METADATA _score + | WHERE match_phrase(content, "brown fox") + | STATS m = max(_score), n = min(_score) + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("m", "n")); + assertColumnTypes(resp.columns(), List.of("double", "double")); + List> valuesList = getValuesList(resp.values()); + assertEquals(1, valuesList.size()); + assertThat((double) valuesList.get(0).get(0), Matchers.greaterThan(1.0)); + assertThat((double) valuesList.get(0).get(1), Matchers.greaterThan(0.0)); + } + } + + public void testMatchPhraseWithinEval() { + var query = """ + FROM test + | EVAL matches_query = match_phrase(content, "brown fox") + """; + + var error = expectThrows(VerificationException.class, () -> run(query)); + assertThat(error.getMessage(), containsString("[MatchPhrase] function is only supported in WHERE and STATS commands")); + } + + private void createAndPopulateIndex() { + var indexName = "test"; + var client = client().admin().indices(); + var CreateRequest = client.prepareCreate(indexName) + .setSettings(Settings.builder().put("index.number_of_shards", 1)) + .setMapping("id", "type=integer", "content", "type=text"); + assertAcked(CreateRequest); + client().prepareBulk() + .add(new IndexRequest(indexName).id("1").source("id", 1, "content", "This is a brown fox")) + .add(new IndexRequest(indexName).id("2").source("id", 2, "content", "This is a brown dog")) + .add(new IndexRequest(indexName).id("3").source("id", 3, "content", "This dog is really brown")) + .add(new IndexRequest(indexName).id("4").source("id", 4, "content", "The dog is brown but this document is very very long")) + .add(new IndexRequest(indexName).id("5").source("id", 5, "content", "There is also a white cat")) + .add(new IndexRequest(indexName).id("6").source("id", 6, "content", "The quick brown fox jumps over the lazy dog")) + .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE) + .get(); + ensureYellow(indexName); + } +} diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ScoringIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ScoringIT.java index 4a76c7de81200..3988eba279354 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ScoringIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ScoringIT.java @@ -49,6 +49,7 @@ public static List params() { params.add(new Object[] { "content:\"fox\"" }); params.add(new Object[] { "qstr(\"content: fox\")" }); params.add(new Object[] { "kql(\"content*: fox\")" }); + params.add(new Object[] { "match_phrase(content, \"fox\")" }); if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { params.add(new Object[] { "term(content, \"fox\")" }); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index cfd738da86512..0595fbc69494d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -935,7 +935,12 @@ public enum Cap { /** * Support knn function */ - KNN_FUNCTION(Build.current().isSnapshot()); + KNN_FUNCTION(Build.current().isSnapshot()), + + /** + * MATCH PHRASE function + */ + MATCH_PHRASE_FUNCTION; private final boolean enabled; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java index 69752b9985263..fd00e84077189 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java @@ -34,6 +34,7 @@ import org.elasticsearch.xpack.esql.expression.function.aggregate.WeightedAvg; import org.elasticsearch.xpack.esql.expression.function.fulltext.Kql; import org.elasticsearch.xpack.esql.expression.function.fulltext.Match; +import org.elasticsearch.xpack.esql.expression.function.fulltext.MatchPhrase; import org.elasticsearch.xpack.esql.expression.function.fulltext.QueryString; import org.elasticsearch.xpack.esql.expression.function.fulltext.Term; import org.elasticsearch.xpack.esql.expression.function.grouping.Bucket; @@ -444,7 +445,8 @@ private static FunctionDefinition[][] functions() { new FunctionDefinition[] { def(Kql.class, uni(Kql::new), "kql"), def(Match.class, tri(Match::new), "match"), - def(QueryString.class, bi(QueryString::new), "qstr") } }; + def(QueryString.class, bi(QueryString::new), "qstr"), + def(MatchPhrase.class, tri(MatchPhrase::new), "match_phrase") } }; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java index 7e2e3d459d477..dc509cbd87caf 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java @@ -60,6 +60,8 @@ import static org.elasticsearch.xpack.esql.common.Failure.fail; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.DEFAULT; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isFoldable; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isMapExpression; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isNotNull; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isNotNullAndFoldable; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString; @@ -369,6 +371,31 @@ protected static void populateOptionsMap( } } + protected TypeResolution resolveOptions(Expression options, TypeResolutions.ParamOrdinal paramOrdinal) { + if (options != null) { + TypeResolution resolution = isNotNull(options, sourceText(), paramOrdinal); + if (resolution.unresolved()) { + return resolution; + } + // MapExpression does not have a DataType associated with it + resolution = isMapExpression(options, sourceText(), paramOrdinal); + if (resolution.unresolved()) { + return resolution; + } + + try { + resolvedOptions(); + } catch (InvalidArgumentException e) { + return new TypeResolution(e.getMessage()); + } + } + return TypeResolution.TYPE_RESOLVED; + } + + protected Map resolvedOptions() throws InvalidArgumentException { + return Map.of(); + } + public static String getNameFromFieldAttribute(FieldAttribute fieldAttribute) { String fieldName = fieldAttribute.name(); if (fieldAttribute.field() instanceof MultiTypeEsField multiTypeEsField) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextWritables.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextWritables.java index 5c0a3857d7783..20616903279c4 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextWritables.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextWritables.java @@ -24,6 +24,7 @@ public static List getNamedWriteables() { entries.add(QueryString.ENTRY); entries.add(Match.ENTRY); entries.add(Kql.ENTRY); + entries.add(MatchPhrase.ENTRY); if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { entries.add(Term.ENTRY); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/Match.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/Match.java index 0082d34c6ce3c..e6a6a730e9734 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/Match.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/Match.java @@ -18,18 +18,14 @@ import org.elasticsearch.xpack.esql.common.Failure; import org.elasticsearch.xpack.esql.common.Failures; import org.elasticsearch.xpack.esql.core.InvalidArgumentException; -import org.elasticsearch.xpack.esql.core.expression.EntryExpression; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.FoldContext; -import org.elasticsearch.xpack.esql.core.expression.Literal; import org.elasticsearch.xpack.esql.core.expression.MapExpression; import org.elasticsearch.xpack.esql.core.querydsl.query.Query; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; -import org.elasticsearch.xpack.esql.core.type.DataTypeConverter; -import org.elasticsearch.xpack.esql.core.type.MultiTypeEsField; import org.elasticsearch.xpack.esql.core.util.Check; import org.elasticsearch.xpack.esql.core.util.NumericUtils; import org.elasticsearch.xpack.esql.expression.function.Example; @@ -37,7 +33,6 @@ import org.elasticsearch.xpack.esql.expression.function.MapParam; import org.elasticsearch.xpack.esql.expression.function.OptionalArgument; import org.elasticsearch.xpack.esql.expression.function.Param; -import org.elasticsearch.xpack.esql.expression.function.scalar.convert.AbstractConvertFunction; import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.planner.TranslatorHandler; @@ -53,7 +48,6 @@ import java.util.function.BiConsumer; import static java.util.Map.entry; -import static org.elasticsearch.common.logging.LoggerMessageFormat.format; import static org.elasticsearch.index.query.AbstractQueryBuilder.BOOST_FIELD; import static org.elasticsearch.index.query.MatchQueryBuilder.ANALYZER_FIELD; import static org.elasticsearch.index.query.MatchQueryBuilder.FUZZY_REWRITE_FIELD; @@ -68,8 +62,6 @@ import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.THIRD; -import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isFoldable; -import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isMapExpression; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isNotNull; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isNotNullAndFoldable; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isType; @@ -302,7 +294,7 @@ public final void writeTo(StreamOutput out) throws IOException { @Override protected TypeResolution resolveParams() { - return resolveField().and(resolveQuery()).and(resolveOptions()).and(checkParamCompatibility()); + return resolveField().and(resolveQuery()).and(resolveOptions(options(), THIRD)).and(checkParamCompatibility()); } private TypeResolution resolveField() { @@ -346,25 +338,9 @@ private TypeResolution checkParamCompatibility() { return new TypeResolution(formatIncompatibleTypesMessage(fieldType, queryType, sourceText())); } - private TypeResolution resolveOptions() { - if (options() != null) { - TypeResolution resolution = isNotNull(options(), sourceText(), THIRD); - if (resolution.unresolved()) { - return resolution; - } - // MapExpression does not have a DataType associated with it - resolution = isMapExpression(options(), sourceText(), THIRD); - if (resolution.unresolved()) { - return resolution; - } - - try { - matchQueryOptions(); - } catch (InvalidArgumentException e) { - return new TypeResolution(e.getMessage()); - } - } - return TypeResolution.TYPE_RESOLVED; + @Override + protected Map resolvedOptions() { + return matchQueryOptions(); } private Map matchQueryOptions() throws InvalidArgumentException { @@ -377,33 +353,7 @@ private Map matchQueryOptions() throws InvalidArgumentException // Match is lenient by default to avoid failing on incompatible types matchOptions.put(LENIENT_FIELD.getPreferredName(), true); - for (EntryExpression entry : ((MapExpression) options()).entryExpressions()) { - Expression optionExpr = entry.key(); - Expression valueExpr = entry.value(); - TypeResolution resolution = isFoldable(optionExpr, sourceText(), SECOND).and(isFoldable(valueExpr, sourceText(), SECOND)); - if (resolution.unresolved()) { - throw new InvalidArgumentException(resolution.message()); - } - Object optionExprLiteral = ((Literal) optionExpr).value(); - Object valueExprLiteral = ((Literal) valueExpr).value(); - String optionName = optionExprLiteral instanceof BytesRef br ? br.utf8ToString() : optionExprLiteral.toString(); - String optionValue = valueExprLiteral instanceof BytesRef br ? br.utf8ToString() : valueExprLiteral.toString(); - // validate the optionExpr is supported - DataType dataType = ALLOWED_OPTIONS.get(optionName); - if (dataType == null) { - throw new InvalidArgumentException( - format(null, "Invalid option [{}] in [{}], expected one of {}", optionName, sourceText(), ALLOWED_OPTIONS.keySet()) - ); - } - try { - matchOptions.put(optionName, DataTypeConverter.convert(optionValue, dataType)); - } catch (InvalidArgumentException e) { - throw new InvalidArgumentException( - format(null, "Invalid option [{}] in [{}], {}", optionName, sourceText(), e.getMessage()) - ); - } - } - + populateOptionsMap((MapExpression) options(), matchOptions, SECOND, sourceText(), ALLOWED_OPTIONS); return matchOptions; } @@ -486,22 +436,13 @@ public Object queryAsObject() { protected Query translate(TranslatorHandler handler) { var fieldAttribute = fieldAsFieldAttribute(); Check.notNull(fieldAttribute, "Match must have a field attribute as the first argument"); - String fieldName = fieldAttribute.name(); - if (fieldAttribute.field() instanceof MultiTypeEsField multiTypeEsField) { - // If we have multiple field types, we allow the query to be done, but getting the underlying field name - fieldName = multiTypeEsField.getName(); - } + String fieldName = getNameFromFieldAttribute(fieldAttribute); // Make query lenient so mixed field types can be queried when a field type is incompatible with the value provided return new MatchQuery(source(), fieldName, queryAsObject(), matchQueryOptions()); } private FieldAttribute fieldAsFieldAttribute() { - Expression fieldExpression = field; - // Field may be converted to other data type (field_name :: data_type), so we need to check the original field - if (fieldExpression instanceof AbstractConvertFunction convertFunction) { - fieldExpression = convertFunction.field(); - } - return fieldExpression instanceof FieldAttribute fieldAttribute ? fieldAttribute : null; + return fieldAsFieldAttribute(field); } @Override diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/MatchPhrase.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/MatchPhrase.java new file mode 100644 index 0000000000000..49d5d8ed2929f --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/MatchPhrase.java @@ -0,0 +1,319 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.fulltext; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.xpack.esql.capabilities.PostAnalysisPlanVerificationAware; +import org.elasticsearch.xpack.esql.common.Failure; +import org.elasticsearch.xpack.esql.common.Failures; +import org.elasticsearch.xpack.esql.core.InvalidArgumentException; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; +import org.elasticsearch.xpack.esql.core.expression.FoldContext; +import org.elasticsearch.xpack.esql.core.expression.MapExpression; +import org.elasticsearch.xpack.esql.core.querydsl.query.Query; +import org.elasticsearch.xpack.esql.core.tree.NodeInfo; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.core.util.Check; +import org.elasticsearch.xpack.esql.expression.function.Example; +import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; +import org.elasticsearch.xpack.esql.expression.function.MapParam; +import org.elasticsearch.xpack.esql.expression.function.OptionalArgument; +import org.elasticsearch.xpack.esql.expression.function.Param; +import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; +import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; +import org.elasticsearch.xpack.esql.planner.TranslatorHandler; +import org.elasticsearch.xpack.esql.querydsl.query.MatchPhraseQuery; +import org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.function.BiConsumer; + +import static java.util.Map.entry; +import static org.elasticsearch.index.query.AbstractQueryBuilder.BOOST_FIELD; +import static org.elasticsearch.index.query.MatchPhraseQueryBuilder.SLOP_FIELD; +import static org.elasticsearch.index.query.MatchPhraseQueryBuilder.ZERO_TERMS_QUERY_FIELD; +import static org.elasticsearch.index.query.MatchQueryBuilder.ANALYZER_FIELD; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.THIRD; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isNotNull; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isNotNullAndFoldable; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isType; +import static org.elasticsearch.xpack.esql.core.type.DataType.DATE_NANOS; +import static org.elasticsearch.xpack.esql.core.type.DataType.FLOAT; +import static org.elasticsearch.xpack.esql.core.type.DataType.INTEGER; +import static org.elasticsearch.xpack.esql.core.type.DataType.IP; +import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD; +import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT; +import static org.elasticsearch.xpack.esql.core.type.DataType.VERSION; + +/** + * Full text function that performs a {@link org.elasticsearch.xpack.esql.querydsl.query.MatchPhraseQuery} . + */ +public class MatchPhrase extends FullTextFunction implements OptionalArgument, PostAnalysisPlanVerificationAware { + + public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( + Expression.class, + "MatchPhrase", + MatchPhrase::readFrom + ); + public static final Set FIELD_DATA_TYPES = Set.of(KEYWORD, TEXT); + public static final Set QUERY_DATA_TYPES = Set.of(KEYWORD, TEXT); + + protected final Expression field; + + // Options for match_phrase function. They don’t need to be serialized as the data nodes will retrieve them from the query builder + private final transient Expression options; + + public static final Map ALLOWED_OPTIONS = Map.ofEntries( + entry(ANALYZER_FIELD.getPreferredName(), KEYWORD), + entry(BOOST_FIELD.getPreferredName(), FLOAT), + entry(SLOP_FIELD.getPreferredName(), INTEGER), + entry(ZERO_TERMS_QUERY_FIELD.getPreferredName(), KEYWORD) + ); + + @FunctionInfo( + returnType = "boolean", + preview = true, + description = """ + Use `MATCH_PHRASE` to perform a [`match_phrase`](/reference/query-languages/query-dsl/query-dsl-match-query-phrase.md) on the + specified field. + Using `MATCH_PHRASE` is equivalent to using the `match_phrase` query in the Elasticsearch Query DSL. + + MatchPhrase can be used on <> fields, as well as other field types like keyword, boolean, or date types. + MatchPhrase is not supported for <> or numeric types. + + MatchPhrase can use <> to specify additional options for the + match_phrase query. + All [`match_phrase`](/reference/query-languages/query-dsl/query-dsl-match-query-phrase.md) query parameters are supported. + + `MATCH_PHRASE` returns true if the provided query matches the row.""", + examples = { @Example(file = "match-phrase-function", tag = "match-phrase-with-field") } + ) + public MatchPhrase( + Source source, + @Param(name = "field", type = { "keyword", "text" }, description = "Field that the query will target.") Expression field, + @Param(name = "query", type = { "keyword" }, description = "Value to find in the provided field.") Expression matchPhraseQuery, + @MapParam( + name = "options", + params = { + @MapParam.MapParamEntry( + name = "analyzer", + type = "keyword", + valueHint = { "standard" }, + description = "Analyzer used to convert the text in the query value into token. Defaults to the index-time analyzer" + + " mapped for the field. If no analyzer is mapped, the index’s default analyzer is used." + ), + @MapParam.MapParamEntry( + name = "slop", + type = "integer", + valueHint = { "1" }, + description = "Maximum number of positions allowed between matching tokens. Defaults to 0." + + " Transposed terms have a slop of 2." + ), + @MapParam.MapParamEntry( + name = "zero_terms_query", + type = "keyword", + valueHint = { "none", "all" }, + description = "Indicates whether all documents or none are returned if the analyzer removes all tokens, such as " + + "when using a stop filter. Defaults to none." + ), + @MapParam.MapParamEntry( + name = "boost", + type = "float", + valueHint = { "2.5" }, + description = "Floating point number used to decrease or increase the relevance scores of the query. Defaults to 1.0." + ) }, + description = "(Optional) MatchPhrase additional options as <>." + + " See [`match_phrase`](/reference/query-languages/query-dsl/query-dsl-match-query-phrase.md) for more information.", + optional = true + ) Expression options + ) { + this(source, field, matchPhraseQuery, options, null); + } + + public MatchPhrase(Source source, Expression field, Expression matchPhraseQuery, Expression options, QueryBuilder queryBuilder) { + super( + source, + matchPhraseQuery, + options == null ? List.of(field, matchPhraseQuery) : List.of(field, matchPhraseQuery, options), + queryBuilder + ); + this.field = field; + this.options = options; + } + + @Override + public String getWriteableName() { + return ENTRY.name; + } + + @Override + public String functionName() { + return ENTRY.name; + } + + private static MatchPhrase readFrom(StreamInput in) throws IOException { + Source source = Source.readFrom((PlanStreamInput) in); + Expression field = in.readNamedWriteable(Expression.class); + Expression query = in.readNamedWriteable(Expression.class); + QueryBuilder queryBuilder = in.readOptionalNamedWriteable(QueryBuilder.class); + return new MatchPhrase(source, field, query, null, queryBuilder); + } + + @Override + public final void writeTo(StreamOutput out) throws IOException { + source().writeTo(out); + out.writeNamedWriteable(field()); + out.writeNamedWriteable(query()); + out.writeOptionalNamedWriteable(queryBuilder()); + } + + @Override + protected TypeResolution resolveParams() { + return resolveField().and(resolveQuery()).and(resolveOptions(options(), THIRD)); + } + + private TypeResolution resolveField() { + return isNotNull(field, sourceText(), FIRST).and(isType(field, FIELD_DATA_TYPES::contains, sourceText(), FIRST, "keyword, text")); + } + + private TypeResolution resolveQuery() { + return isType(query(), QUERY_DATA_TYPES::contains, sourceText(), SECOND, "keyword").and( + isNotNullAndFoldable(query(), sourceText(), SECOND) + ); + } + + @Override + protected Map resolvedOptions() throws InvalidArgumentException { + return matchPhraseQueryOptions(); + } + + private Map matchPhraseQueryOptions() throws InvalidArgumentException { + if (options() == null) { + return Map.of(); + } + + Map matchPhraseOptions = new HashMap<>(); + populateOptionsMap((MapExpression) options(), matchPhraseOptions, SECOND, sourceText(), ALLOWED_OPTIONS); + return matchPhraseOptions; + } + + public Expression field() { + return field; + } + + public Expression options() { + return options; + } + + @Override + protected NodeInfo info() { + return NodeInfo.create(this, MatchPhrase::new, field(), query(), options(), queryBuilder()); + } + + @Override + public Expression replaceChildren(List newChildren) { + return new MatchPhrase( + source(), + newChildren.get(0), + newChildren.get(1), + newChildren.size() > 2 ? newChildren.get(2) : null, + queryBuilder() + ); + } + + @Override + public Expression replaceQueryBuilder(QueryBuilder queryBuilder) { + return new MatchPhrase(source(), field, query(), options(), queryBuilder); + } + + @Override + public BiConsumer postAnalysisPlanVerification() { + return (plan, failures) -> { + super.postAnalysisPlanVerification().accept(plan, failures); + plan.forEachExpression(MatchPhrase.class, mp -> { + if (mp.fieldAsFieldAttribute() == null) { + failures.add( + Failure.fail( + mp.field(), + "[{}] {} cannot operate on [{}], which is not a field from an index mapping", + functionName(), + functionType(), + mp.field().sourceText() + ) + ); + } + }); + }; + } + + @Override + public Object queryAsObject() { + Object queryAsObject = query().fold(FoldContext.small() /* TODO remove me */); + + // Convert BytesRef to string for string-based values + if (queryAsObject instanceof BytesRef bytesRef) { + return switch (query().dataType()) { + case IP -> EsqlDataTypeConverter.ipToString(bytesRef); + case VERSION -> EsqlDataTypeConverter.versionToString(bytesRef); + default -> bytesRef.utf8ToString(); + }; + } + + // Converts specific types to the correct type for the query + if (query().dataType() == DataType.DATETIME && queryAsObject instanceof Long) { + // When casting to date and datetime, we get a long back. But MatchPhrase query needs a date string + return EsqlDataTypeConverter.dateTimeToString((Long) queryAsObject); + } else if (query().dataType() == DATE_NANOS && queryAsObject instanceof Long) { + return EsqlDataTypeConverter.nanoTimeToString((Long) queryAsObject); + } + + return queryAsObject; + } + + @Override + protected Query translate(TranslatorHandler handler) { + var fieldAttribute = fieldAsFieldAttribute(); + Check.notNull(fieldAttribute, "MatchPhrase must have a field attribute as the first argument"); + String fieldName = getNameFromFieldAttribute(fieldAttribute); + return new MatchPhraseQuery(source(), fieldName, queryAsObject(), matchPhraseQueryOptions()); + } + + private FieldAttribute fieldAsFieldAttribute() { + return fieldAsFieldAttribute(field); + } + + @Override + public boolean equals(Object o) { + // MatchPhrase does not serialize options, as they get included in the query builder. We need to override equals and hashcode to + // ignore options when comparing two MatchPhrase functions + if (o == null || getClass() != o.getClass()) return false; + MatchPhrase matchPhrase = (MatchPhrase) o; + return Objects.equals(field(), matchPhrase.field()) + && Objects.equals(query(), matchPhrase.query()) + && Objects.equals(queryBuilder(), matchPhrase.queryBuilder()); + } + + @Override + public int hashCode() { + return Objects.hash(field(), query(), queryBuilder()); + } + +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryString.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryString.java index e72e95787cc3c..ec87103163661 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryString.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryString.java @@ -64,8 +64,6 @@ import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isFoldable; -import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isMapExpression; -import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isNotNull; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isNotNullAndFoldable; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isType; import static org.elasticsearch.xpack.esql.core.type.DataType.BOOLEAN; @@ -353,30 +351,14 @@ private Map queryStringOptions() throws InvalidArgumentException return matchOptions; } - private TypeResolution resolveOptions() { - if (options() != null) { - TypeResolution resolution = isNotNull(options(), sourceText(), SECOND); - if (resolution.unresolved()) { - return resolution; - } - // MapExpression does not have a DataType associated with it - resolution = isMapExpression(options(), sourceText(), SECOND); - if (resolution.unresolved()) { - return resolution; - } - - try { - queryStringOptions(); - } catch (InvalidArgumentException e) { - return new TypeResolution(e.getMessage()); - } - } - return TypeResolution.TYPE_RESOLVED; + @Override + protected Map resolvedOptions() { + return queryStringOptions(); } @Override protected TypeResolution resolveParams() { - return resolveQuery().and(resolveOptions()); + return resolveQuery().and(resolveOptions(options(), SECOND)); } @Override diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/MatchPhraseQuery.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/MatchPhraseQuery.java new file mode 100644 index 0000000000000..be6f244ac4acf --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/MatchPhraseQuery.java @@ -0,0 +1,111 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ +package org.elasticsearch.xpack.esql.querydsl.query; + +import org.elasticsearch.index.query.AbstractQueryBuilder; +import org.elasticsearch.index.query.MatchPhraseQueryBuilder; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.xpack.esql.core.querydsl.query.Query; +import org.elasticsearch.xpack.esql.core.tree.Source; + +import java.util.Map; +import java.util.Objects; +import java.util.function.BiConsumer; + +import static java.util.Map.entry; +import static org.elasticsearch.index.query.MatchPhraseQueryBuilder.SLOP_FIELD; +import static org.elasticsearch.index.query.MatchPhraseQueryBuilder.ZERO_TERMS_QUERY_FIELD; +import static org.elasticsearch.index.query.MatchQueryBuilder.ANALYZER_FIELD; + +public class MatchPhraseQuery extends Query { + + private static final Map> BUILDER_APPLIERS; + + static { + BUILDER_APPLIERS = Map.ofEntries( + entry(ANALYZER_FIELD.getPreferredName(), (qb, s) -> qb.analyzer(s.toString())), + entry(SLOP_FIELD.getPreferredName(), (qb, s) -> qb.slop(Integer.parseInt(s.toString()))), + entry(ZERO_TERMS_QUERY_FIELD.getPreferredName(), (qb, s) -> qb.zeroTermsQuery((String) s)), + entry(AbstractQueryBuilder.BOOST_FIELD.getPreferredName(), (qb, s) -> qb.boost((Float) s)) + ); + } + + private final String name; + private final Object text; + private final Double boost; + private final Map options; + + public MatchPhraseQuery(Source source, String name, Object text) { + this(source, name, text, Map.of()); + } + + public MatchPhraseQuery(Source source, String name, Object text, Map options) { + super(source); + assert options != null; + this.name = name; + this.text = text; + this.options = options; + this.boost = null; + } + + @Override + protected QueryBuilder asBuilder() { + final MatchPhraseQueryBuilder queryBuilder = QueryBuilders.matchPhraseQuery(name, text); + options.forEach((k, v) -> { + if (BUILDER_APPLIERS.containsKey(k)) { + BUILDER_APPLIERS.get(k).accept(queryBuilder, v); + } else { + throw new IllegalArgumentException("illegal match_phrase option [" + k + "]"); + } + }); + if (boost != null) { + queryBuilder.boost(boost.floatValue()); + } + return queryBuilder; + } + + public String name() { + return name; + } + + public Object text() { + return text; + } + + @Override + public int hashCode() { + return Objects.hash(text, name, options, boost); + } + + @Override + public boolean equals(Object obj) { + if (false == super.equals(obj)) { + return false; + } + + MatchPhraseQuery other = (MatchPhraseQuery) obj; + return Objects.equals(text, other.text) + && Objects.equals(name, other.name) + && Objects.equals(options, other.options) + && Objects.equals(boost, other.boost); + } + + @Override + protected String innerToString() { + return name + ":" + text; + } + + public Map options() { + return options; + } + + @Override + public boolean scorable() { + return true; + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java index d44dcfdf8f72b..a86c9de4701ea 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java @@ -269,6 +269,10 @@ public final void test() throws Throwable { "can't use MATCH function in csv tests", testCase.requiredCapabilities.contains(EsqlCapabilities.Cap.MATCH_FUNCTION.capabilityName()) ); + assumeFalse( + "can't use MATCH_PHRASE function in csv tests", + testCase.requiredCapabilities.contains(EsqlCapabilities.Cap.MATCH_PHRASE_FUNCTION.capabilityName()) + ); assumeFalse( "can't use KQL function in csv tests", testCase.requiredCapabilities.contains(EsqlCapabilities.Cap.KQL_FUNCTION.capabilityName()) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index 0677eb01b5231..5fddc95a41d2e 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -19,6 +19,7 @@ import org.elasticsearch.xpack.esql.core.type.InvalidMappedField; import org.elasticsearch.xpack.esql.core.type.UnsupportedEsField; import org.elasticsearch.xpack.esql.expression.function.fulltext.Match; +import org.elasticsearch.xpack.esql.expression.function.fulltext.MatchPhrase; import org.elasticsearch.xpack.esql.expression.function.fulltext.QueryString; import org.elasticsearch.xpack.esql.expression.function.vector.Knn; import org.elasticsearch.xpack.esql.index.EsIndex; @@ -1226,6 +1227,9 @@ public void testFieldBasedFullTextFunctions() throws Exception { checkFieldBasedWithNonIndexedColumn(":", "text : \"cat\"", "operator"); checkFieldBasedFunctionNotAllowedAfterCommands(":", "operator", "title : \"Meditation\""); + checkFieldBasedWithNonIndexedColumn("MatchPhrase", "match_phrase(text, \"cat\")", "function"); + checkFieldBasedFunctionNotAllowedAfterCommands("MatchPhrase", "function", "match_phrase(title, \"Meditation\")"); + if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { checkFieldBasedWithNonIndexedColumn("Term", "term(text, \"cat\")", "function"); checkFieldBasedFunctionNotAllowedAfterCommands("Term", "function", "term(title, \"Meditation\")"); @@ -1356,6 +1360,7 @@ public void testFullTextFunctionsOnlyAllowedInWhere() throws Exception { checkFullTextFunctionsOnlyAllowedInWhere(":", "title:\"Meditation\"", "operator"); checkFullTextFunctionsOnlyAllowedInWhere("QSTR", "qstr(\"Meditation\")", "function"); checkFullTextFunctionsOnlyAllowedInWhere("KQL", "kql(\"Meditation\")", "function"); + checkFullTextFunctionsOnlyAllowedInWhere("MatchPhrase", "match_phrase(title, \"Meditation\")", "function"); if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { checkFullTextFunctionsOnlyAllowedInWhere("Term", "term(title, \"Meditation\")", "function"); } @@ -1391,6 +1396,7 @@ public void testFullTextFunctionsDisjunctions() { checkWithFullTextFunctionsDisjunctions("title : \"Meditation\""); checkWithFullTextFunctionsDisjunctions("qstr(\"title: Meditation\")"); checkWithFullTextFunctionsDisjunctions("kql(\"title: Meditation\")"); + checkWithFullTextFunctionsDisjunctions("match_phrase(title, \"Meditation\")"); if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { checkWithFullTextFunctionsDisjunctions("term(title, \"Meditation\")"); } @@ -1452,6 +1458,7 @@ public void testFullTextFunctionsWithNonBooleanFunctions() { checkFullTextFunctionsWithNonBooleanFunctions(":", "title:\"Meditation\"", "operator"); checkFullTextFunctionsWithNonBooleanFunctions("QSTR", "qstr(\"title: Meditation\")", "function"); checkFullTextFunctionsWithNonBooleanFunctions("KQL", "kql(\"title: Meditation\")", "function"); + checkFullTextFunctionsWithNonBooleanFunctions("MatchPhrase", "match_phrase(title, \"Meditation\")", "function"); if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { checkFullTextFunctionsWithNonBooleanFunctions("Term", "term(title, \"Meditation\")", "function"); } @@ -1519,6 +1526,7 @@ private void checkFullTextFunctionsWithNonBooleanFunctions(String functionName, public void testFullTextFunctionsTargetsExistingField() throws Exception { testFullTextFunctionTargetsExistingField("match(title, \"Meditation\")"); testFullTextFunctionTargetsExistingField("title : \"Meditation\""); + testFullTextFunctionTargetsExistingField("match_phrase(title, \"Meditation\")"); if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { testFullTextFunctionTargetsExistingField("term(fist_name, \"Meditation\")"); } @@ -2043,6 +2051,7 @@ public void testLookupJoinDataTypeMismatch() { public void testFullTextFunctionOptions() { checkOptionDataTypes(Match.ALLOWED_OPTIONS, "FROM test | WHERE match(title, \"Jean\", {\"%s\": %s})"); checkOptionDataTypes(QueryString.ALLOWED_OPTIONS, "FROM test | WHERE QSTR(\"title: Jean\", {\"%s\": %s})"); + checkOptionDataTypes(MatchPhrase.ALLOWED_OPTIONS, "FROM test | WHERE MATCH_PHRASE(title, \"Jean\", {\"%s\": %s})"); if (EsqlCapabilities.Cap.KNN_FUNCTION.isEnabled()) { checkOptionDataTypes(Knn.ALLOWED_OPTIONS, "FROM test | WHERE KNN(vector, [0.1, 0.2, 0.3], {\"%s\": %s})"); } @@ -2100,6 +2109,7 @@ private static String exampleValueForType(DataType currentType) { public void testFullTextFunctionCurrentlyUnsupportedBehaviour() throws Exception { testFullTextFunctionsCurrentlyUnsupportedBehaviour("match(title, \"Meditation\")"); testFullTextFunctionsCurrentlyUnsupportedBehaviour("title : \"Meditation\""); + testFullTextFunctionsCurrentlyUnsupportedBehaviour("match_phrase(title, \"Meditation\")"); if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { testFullTextFunctionsCurrentlyUnsupportedBehaviour("term(title, \"Meditation\")"); } @@ -2117,6 +2127,8 @@ public void testFullTextFunctionsNullArgs() throws Exception { checkFullTextFunctionNullArgs("match(title, null)", "second"); checkFullTextFunctionNullArgs("qstr(null)", ""); checkFullTextFunctionNullArgs("kql(null)", ""); + checkFullTextFunctionNullArgs("match_phrase(null, \"query\")", "first"); + checkFullTextFunctionNullArgs("match_phrase(title, null)", "second"); if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { checkFullTextFunctionNullArgs("term(null, \"query\")", "first"); checkFullTextFunctionNullArgs("term(title, null)", "second"); @@ -2138,6 +2150,7 @@ public void testFullTextFunctionsConstantQuery() throws Exception { checkFullTextFunctionsConstantQuery("match(title, category)", "second"); checkFullTextFunctionsConstantQuery("qstr(title)", ""); checkFullTextFunctionsConstantQuery("kql(title)", ""); + checkFullTextFunctionsConstantQuery("match_phrase(title, tags)", "second"); if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { checkFullTextFunctionsConstantQuery("term(title, tags)", "second"); } @@ -2158,6 +2171,7 @@ public void testFullTextFunctionsInStats() { checkFullTextFunctionsInStats("title : \"Meditation\""); checkFullTextFunctionsInStats("qstr(\"title: Meditation\")"); checkFullTextFunctionsInStats("kql(\"title: Meditation\")"); + checkFullTextFunctionsInStats("match_phrase(title, \"Meditation\")"); if (EsqlCapabilities.Cap.KNN_FUNCTION.isEnabled()) { checkFullTextFunctionsInStats("knn(vector, [0, 1, 2])"); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/MatchPhraseTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/MatchPhraseTests.java new file mode 100644 index 0000000000000..ce996962398bf --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/MatchPhraseTests.java @@ -0,0 +1,129 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.fulltext; + +import com.carrotsearch.randomizedtesting.annotations.Name; +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; +import org.elasticsearch.xpack.esql.core.expression.Literal; +import org.elasticsearch.xpack.esql.core.expression.MapExpression; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.function.AbstractFunctionTestCase; +import org.elasticsearch.xpack.esql.expression.function.FunctionName; +import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier; +import org.elasticsearch.xpack.esql.io.stream.PlanStreamOutput; +import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates; + +import java.util.ArrayList; +import java.util.List; +import java.util.function.Supplier; + +import static org.elasticsearch.xpack.esql.SerializationTestUtils.serializeDeserialize; +import static org.elasticsearch.xpack.esql.core.type.DataType.BOOLEAN; +import static org.elasticsearch.xpack.esql.core.type.DataType.INTEGER; +import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD; +import static org.elasticsearch.xpack.esql.core.type.DataType.UNSUPPORTED; +import static org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier.stringCases; +import static org.elasticsearch.xpack.esql.planner.TranslatorHandler.TRANSLATOR_HANDLER; +import static org.hamcrest.Matchers.equalTo; + +@FunctionName("match_phrase") +public class MatchPhraseTests extends AbstractFunctionTestCase { + + public MatchPhraseTests(@Name("TestCase") Supplier testCaseSupplier) { + this.testCase = testCaseSupplier.get(); + } + + @ParametersFactory + public static Iterable parameters() { + return parameterSuppliersFromTypedData(addFunctionNamedParams(testCaseSuppliers())); + } + + private static List testCaseSuppliers() { + List suppliers = new ArrayList<>(); + addStringTestCases(suppliers); + return suppliers; + } + + public static void addStringTestCases(List suppliers) { + for (DataType fieldType : DataType.stringTypes()) { + if (DataType.UNDER_CONSTRUCTION.containsKey(fieldType)) { + continue; + } + for (TestCaseSupplier.TypedDataSupplier queryDataSupplier : stringCases(fieldType)) { + suppliers.add( + TestCaseSupplier.testCaseSupplier( + queryDataSupplier, + new TestCaseSupplier.TypedDataSupplier(fieldType.typeName(), () -> randomAlphaOfLength(10), DataType.KEYWORD), + (d1, d2) -> equalTo("string"), + DataType.BOOLEAN, + (o1, o2) -> true + ) + ); + } + } + } + + /** + * Adds function named parameters to all the test case suppliers provided + */ + private static List addFunctionNamedParams(List suppliers) { + List result = new ArrayList<>(); + for (TestCaseSupplier supplier : suppliers) { + List dataTypes = new ArrayList<>(supplier.types()); + dataTypes.add(UNSUPPORTED); + result.add(new TestCaseSupplier(supplier.name() + ", options", dataTypes, () -> { + List values = new ArrayList<>(supplier.get().getData()); + values.add( + new TestCaseSupplier.TypedData( + new MapExpression( + Source.EMPTY, + List.of(new Literal(Source.EMPTY, "slop", INTEGER), new Literal(Source.EMPTY, randomAlphaOfLength(10), KEYWORD)) + ), + UNSUPPORTED, + "options" + ).forceLiteral() + ); + + return new TestCaseSupplier.TestCase(values, equalTo("MatchPhraseEvaluator"), BOOLEAN, equalTo(true)); + })); + } + return result; + } + + @Override + protected Expression build(Source source, List args) { + MatchPhrase matchPhrase = new MatchPhrase(source, args.get(0), args.get(1), args.size() > 2 ? args.get(2) : null); + // We need to add the QueryBuilder to the match_phrase expression, as it is used to implement equals() and hashCode() and + // thus test the serialization methods. But we can only do this if the parameters make sense . + if (args.get(0) instanceof FieldAttribute && args.get(1).foldable()) { + QueryBuilder queryBuilder = TRANSLATOR_HANDLER.asQuery(LucenePushdownPredicates.DEFAULT, matchPhrase).toQueryBuilder(); + matchPhrase.replaceQueryBuilder(queryBuilder); + } + return matchPhrase; + } + + /** + * Copy of the overridden method that doesn't check for children size, as the {@code options} child isn't serialized in MatchPhrase. + */ + @Override + protected Expression serializeDeserializeExpression(Expression expression) { + Expression newExpression = serializeDeserialize( + expression, + PlanStreamOutput::writeNamedWriteable, + in -> in.readNamedWriteable(Expression.class), + testCase.getConfiguration() // The configuration query should be == to the source text of the function for this to work + ); + // Fields use synthetic sources, which can't be serialized. So we use the originals instead. + return newExpression.replaceChildren(expression.children()); + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/querydsl/query/MatchPhraseQueryTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/querydsl/query/MatchPhraseQueryTests.java new file mode 100644 index 0000000000000..6b81cda4ebb28 --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/querydsl/query/MatchPhraseQueryTests.java @@ -0,0 +1,74 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ +package org.elasticsearch.xpack.esql.querydsl.query; + +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.index.query.MatchPhraseQueryBuilder; +import org.elasticsearch.index.query.ZeroTermsQueryOption; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.tree.SourceTests; +import org.elasticsearch.xpack.esql.core.util.StringUtils; + +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.function.Function; + +import static org.elasticsearch.test.EqualsHashCodeTestUtils.checkEqualsAndHashCode; +import static org.hamcrest.Matchers.equalTo; + +public class MatchPhraseQueryTests extends ESTestCase { + static MatchPhraseQuery randomMatchPhraseQuery() { + return new MatchPhraseQuery(SourceTests.randomSource(), randomAlphaOfLength(5), randomAlphaOfLength(5)); + } + + public void testEqualsAndHashCode() { + checkEqualsAndHashCode(randomMatchPhraseQuery(), MatchPhraseQueryTests::copy, MatchPhraseQueryTests::mutate); + } + + private static MatchPhraseQuery copy(MatchPhraseQuery query) { + return new MatchPhraseQuery(query.source(), query.name(), query.text(), query.options()); + } + + private static MatchPhraseQuery mutate(MatchPhraseQuery query) { + List> options = Arrays.asList( + q -> new MatchPhraseQuery(SourceTests.mutate(q.source()), q.name(), q.text(), q.options()), + q -> new MatchPhraseQuery(q.source(), randomValueOtherThan(q.name(), () -> randomAlphaOfLength(5)), q.text(), q.options()), + q -> new MatchPhraseQuery(q.source(), q.name(), randomValueOtherThan(q.text(), () -> randomAlphaOfLength(5)), q.options()) + ); + return randomFrom(options).apply(query); + } + + public void testQueryBuilding() { + + MatchPhraseQueryBuilder qb = getBuilder(Map.of("slop", 2, "zero_terms_query", "none")); + assertThat(qb.slop(), equalTo(2)); + assertThat(qb.zeroTermsQuery(), equalTo(ZeroTermsQueryOption.NONE)); + + Exception e = expectThrows(IllegalArgumentException.class, () -> getBuilder(Map.of("pizza", "yummy"))); + assertThat(e.getMessage(), equalTo("illegal match_phrase option [pizza]")); + + e = expectThrows(NumberFormatException.class, () -> getBuilder(Map.of("slop", "mushrooms"))); + assertThat(e.getMessage(), equalTo("For input string: \"mushrooms\"")); + + e = expectThrows(ElasticsearchException.class, () -> getBuilder(Map.of("zero_terms_query", "pepperoni"))); + assertThat(e.getMessage(), equalTo("unknown serialized type [pepperoni]")); + } + + private static MatchPhraseQueryBuilder getBuilder(Map options) { + final Source source = new Source(1, 1, StringUtils.EMPTY); + final MatchPhraseQuery mpq = new MatchPhraseQuery(source, "eggplant", "foo bar", options); + return (MatchPhraseQueryBuilder) mpq.asBuilder(); + } + + public void testToString() { + final Source source = new Source(1, 1, StringUtils.EMPTY); + final MatchPhraseQuery mpq = new MatchPhraseQuery(source, "eggplant", "foo bar"); + assertEquals("MatchPhraseQuery@1:2[eggplant:foo bar]", mpq.toString()); + } +} diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml index 071b44b65e98c..8f83936eca3e0 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml @@ -101,7 +101,7 @@ setup: - gt: {esql.functions.to_long: $functions_to_long} - match: {esql.functions.coalesce: $functions_coalesce} # Testing for the entire function set isn't feasible, so we just check that we return the correct count as an approximation. - - length: {esql.functions: 146} # check the "sister" test below for a likely update to the same esql.functions length check + - length: {esql.functions: 147} # check the "sister" test below for a likely update to the same esql.functions length check --- "Basic ESQL usage output (telemetry) non-snapshot version": @@ -180,4 +180,4 @@ setup: - match: {esql.functions.cos: $functions_cos} - gt: {esql.functions.to_long: $functions_to_long} - match: {esql.functions.coalesce: $functions_coalesce} - - length: {esql.functions: 142} # check the "sister" test above for a likely update to the same esql.functions length check + - length: {esql.functions: 143} # check the "sister" test above for a likely update to the same esql.functions length check