diff --git a/docs/reference/esql/esql-process-data-with-dissect-grok.asciidoc b/docs/reference/esql/esql-process-data-with-dissect-grok.asciidoc index 87748fee4f202..e626e058a4e56 100644 --- a/docs/reference/esql/esql-process-data-with-dissect-grok.asciidoc +++ b/docs/reference/esql/esql-process-data-with-dissect-grok.asciidoc @@ -40,7 +40,7 @@ delimiter-based pattern, and extracts the specified keys as columns. For example, the following pattern: [source,txt] ---- -%{clientip} [%{@timestamp}] %{status} +%{clientip} [%{@timestamp}] %{status} ---- matches a log line of this format: @@ -76,8 +76,8 @@ ignore certain fields, append fields, skip over padding, etc. ===== Terminology dissect pattern:: -the set of fields and delimiters describing the textual -format. Also known as a dissection. +the set of fields and delimiters describing the textual +format. Also known as a dissection. The dissection is described using a set of `%{}` sections: `%{a} - %{b} - %{c}` @@ -91,14 +91,14 @@ Any set of characters other than `%{`, `'not }'`, or `}` is a delimiter. key:: + -- -the text between the `%{` and `}`, exclusive of the `?`, `+`, `&` prefixes -and the ordinal suffix. +the text between the `%{` and `}`, exclusive of the `?`, `+`, `&` prefixes +and the ordinal suffix. Examples: -* `%{?aaa}` - the key is `aaa` -* `%{+bbb/3}` - the key is `bbb` -* `%{&ccc}` - the key is `ccc` +* `%{?aaa}` - the key is `aaa` +* `%{+bbb/3}` - the key is `bbb` +* `%{&ccc}` - the key is `ccc` -- [[esql-dissect-examples]] @@ -218,7 +218,7 @@ Putting it together as an {esql} query: [source.merge.styled,esql] ---- -include::{esql-specs}/docs.csv-spec[tag=grokWithEscape] +include::{esql-specs}/docs.csv-spec[tag=grokWithEscapeTripleQuotes] ---- `GROK` adds the following columns to the input table: @@ -239,15 +239,24 @@ with a `\`. For example, in the earlier pattern: %{IP:ip} \[%{TIMESTAMP_ISO8601:@timestamp}\] %{GREEDYDATA:status} ---- -In {esql} queries, the backslash character itself is a special character that +In {esql} queries, when using single quotes for strings, the backslash character itself is a special character that needs to be escaped with another `\`. For this example, the corresponding {esql} query becomes: [source.merge.styled,esql] ---- include::{esql-specs}/docs.csv-spec[tag=grokWithEscape] ---- + +For this reason, in general it is more convenient to use triple quotes `"""` for GROK patterns, +that do not require escaping for backslash. + +[source.merge.styled,esql] +---- +include::{esql-specs}/docs.csv-spec[tag=grokWithEscapeTripleQuotes] +---- ==== + [[esql-grok-patterns]] ===== Grok patterns @@ -318,4 +327,4 @@ as the `GROK` command. The `GROK` command does not support configuring <>, or <>. The `GROK` command is not subject to <>. -// end::grok-limitations[] \ No newline at end of file +// end::grok-limitations[] diff --git a/docs/reference/esql/functions/like.asciidoc b/docs/reference/esql/functions/like.asciidoc index 2298617be5699..a569896bc3c1e 100644 --- a/docs/reference/esql/functions/like.asciidoc +++ b/docs/reference/esql/functions/like.asciidoc @@ -23,4 +23,20 @@ include::{esql-specs}/docs.csv-spec[tag=like] |=== include::{esql-specs}/docs.csv-spec[tag=like-result] |=== + +Matching the exact characters `*` and `.` will require escaping. +The escape character is backslash `\`. Since also backslash is a special character in string literals, +it will require further escaping. + +[source.merge.styled,esql] +---- +include::{esql-specs}/string.csv-spec[tag=likeEscapingSingleQuotes] +---- + +To reduce the overhead of escaping, we suggest using triple quotes strings `"""` + +[source.merge.styled,esql] +---- +include::{esql-specs}/string.csv-spec[tag=likeEscapingTripleQuotes] +---- // end::body[] diff --git a/docs/reference/esql/functions/rlike.asciidoc b/docs/reference/esql/functions/rlike.asciidoc index 031594ae403da..f6009b2c49528 100644 --- a/docs/reference/esql/functions/rlike.asciidoc +++ b/docs/reference/esql/functions/rlike.asciidoc @@ -18,4 +18,20 @@ include::{esql-specs}/docs.csv-spec[tag=rlike] |=== include::{esql-specs}/docs.csv-spec[tag=rlike-result] |=== + +Matching special characters (eg. `.`, `*`, `(`...) will require escaping. +The escape character is backslash `\`. Since also backslash is a special character in string literals, +it will require further escaping. + +[source.merge.styled,esql] +---- +include::{esql-specs}/string.csv-spec[tag=rlikeEscapingSingleQuotes] +---- + +To reduce the overhead of escaping, we suggest using triple quotes strings `"""` + +[source.merge.styled,esql] +---- +include::{esql-specs}/string.csv-spec[tag=rlikeEscapingTripleQuotes] +---- // end::body[] diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec index 15fe6853ae491..a9c5a5214f159 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec @@ -382,7 +382,7 @@ count:long | languages:integer basicGrok // tag::basicGrok[] ROW a = "2023-01-23T12:15:00.000Z 127.0.0.1 some.email@foo.com 42" -| GROK a "%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num}" +| GROK a """%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num}""" | KEEP date, ip, email, num // end::basicGrok[] ; @@ -396,7 +396,7 @@ date:keyword | ip:keyword | email:keyword | num:keyword grokWithConversionSuffix // tag::grokWithConversionSuffix[] ROW a = "2023-01-23T12:15:00.000Z 127.0.0.1 some.email@foo.com 42" -| GROK a "%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num:int}" +| GROK a """%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num:int}""" | KEEP date, ip, email, num // end::grokWithConversionSuffix[] ; @@ -410,7 +410,7 @@ date:keyword | ip:keyword | email:keyword | num:integer grokWithToDatetime // tag::grokWithToDatetime[] ROW a = "2023-01-23T12:15:00.000Z 127.0.0.1 some.email@foo.com 42" -| GROK a "%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num:int}" +| GROK a """%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num:int}""" | KEEP date, ip, email, num | EVAL date = TO_DATETIME(date) // end::grokWithToDatetime[] @@ -436,11 +436,27 @@ ROW a = "1.2.3.4 [2023-01-23T12:15:00.000Z] Connected" // end::grokWithEscape-result[] ; + +grokWithEscapeTripleQuotes +// tag::grokWithEscapeTripleQuotes[] +ROW a = "1.2.3.4 [2023-01-23T12:15:00.000Z] Connected" +| GROK a """%{IP:ip} \[%{TIMESTAMP_ISO8601:@timestamp}\] %{GREEDYDATA:status}""" +// end::grokWithEscapeTripleQuotes[] +| KEEP @timestamp +; + +// tag::grokWithEscapeTripleQuotes-result[] +@timestamp:keyword +2023-01-23T12:15:00.000Z +// end::grokWithEscapeTripleQuotes-result[] +; + + grokWithDuplicateFieldNames // tag::grokWithDuplicateFieldNames[] FROM addresses | KEEP city.name, zip_code -| GROK zip_code "%{WORD:zip_parts} %{WORD:zip_parts}" +| GROK zip_code """%{WORD:zip_parts} %{WORD:zip_parts}""" // end::grokWithDuplicateFieldNames[] | SORT city.name ; @@ -456,7 +472,7 @@ Tokyo | 100-7014 | null basicDissect // tag::basicDissect[] ROW a = "2023-01-23T12:15:00.000Z - some text - 127.0.0.1" -| DISSECT a "%{date} - %{msg} - %{ip}" +| DISSECT a """%{date} - %{msg} - %{ip}""" | KEEP date, msg, ip // end::basicDissect[] ; @@ -470,7 +486,7 @@ date:keyword | msg:keyword | ip:keyword dissectWithToDatetime // tag::dissectWithToDatetime[] ROW a = "2023-01-23T12:15:00.000Z - some text - 127.0.0.1" -| DISSECT a "%{date} - %{msg} - %{ip}" +| DISSECT a """%{date} - %{msg} - %{ip}""" | KEEP date, msg, ip | EVAL date = TO_DATETIME(date) // end::dissectWithToDatetime[] @@ -485,7 +501,7 @@ some text | 127.0.0.1 | 2023-01-23T12:15:00.000Z dissectRightPaddingModifier // tag::dissectRightPaddingModifier[] ROW message="1998-08-10T17:15:42 WARN" -| DISSECT message "%{ts->} %{level}" +| DISSECT message """%{ts->} %{level}""" // end::dissectRightPaddingModifier[] ; @@ -498,7 +514,7 @@ message:keyword | ts:keyword | level:keyword dissectEmptyRightPaddingModifier#[skip:-8.11.2, reason:Support for empty right padding modifiers introduced in 8.11.2] // tag::dissectEmptyRightPaddingModifier[] ROW message="[1998-08-10T17:15:42] [WARN]" -| DISSECT message "[%{ts}]%{->}[%{level}]" +| DISSECT message """[%{ts}]%{->}[%{level}]""" // end::dissectEmptyRightPaddingModifier[] ; @@ -511,7 +527,7 @@ ROW message="[1998-08-10T17:15:42] [WARN]" dissectAppendModifier // tag::dissectAppendModifier[] ROW message="john jacob jingleheimer schmidt" -| DISSECT message "%{+name} %{+name} %{+name} %{+name}" APPEND_SEPARATOR=" " +| DISSECT message """%{+name} %{+name} %{+name} %{+name}""" APPEND_SEPARATOR=" " // end::dissectAppendModifier[] ; @@ -524,7 +540,7 @@ john jacob jingleheimer schmidt|john jacob jingleheimer schmidt dissectAppendWithOrderModifier // tag::dissectAppendWithOrderModifier[] ROW message="john jacob jingleheimer schmidt" -| DISSECT message "%{+name/2} %{+name/4} %{+name/3} %{+name/1}" APPEND_SEPARATOR="," +| DISSECT message """%{+name/2} %{+name/4} %{+name/3} %{+name/1}""" APPEND_SEPARATOR="," // end::dissectAppendWithOrderModifier[] ; @@ -537,7 +553,7 @@ john jacob jingleheimer schmidt|schmidt,john,jingleheimer,jacob dissectNamedSkipKey // tag::dissectNamedSkipKey[] ROW message="1.2.3.4 - - 30/Apr/1998:22:00:52 +0000" -| DISSECT message "%{clientip} %{?ident} %{?auth} %{@timestamp}" +| DISSECT message """%{clientip} %{?ident} %{?auth} %{@timestamp}""" // end::dissectNamedSkipKey[] ; @@ -550,7 +566,7 @@ message:keyword | clientip:keyword | @timestamp:keyword docsLike // tag::like[] FROM employees -| WHERE first_name LIKE "?b*" +| WHERE first_name LIKE """?b*""" | KEEP first_name, last_name // end::like[] | SORT first_name @@ -566,7 +582,7 @@ Eberhardt |Terkki docsRlike // tag::rlike[] FROM employees -| WHERE first_name RLIKE ".leja.*" +| WHERE first_name RLIKE """.leja.*""" | KEEP first_name, last_name // end::rlike[] ; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec index f85a3bb01ad40..1a598ed9bca8b 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec @@ -1595,4 +1595,57 @@ emp_no:integer | languages:integer | first_name:keyword 10004 | 5 | ChirstianChirstianChirstianChirstianChirstian ; +likeEscapingSingleQuotes +// tag::likeEscapingSingleQuotes[] +ROW message = "foo * bar" +| WHERE message LIKE "foo \\* bar" +// end::likeEscapingSingleQuotes[] +; + +// tag::likeEscapingSingleQuotes-result[] +message:keyword +foo * bar +// end::likeEscapingSingleQuotes-result[] +; + + +likeEscapingTripleQuotes +// tag::likeEscapingTripleQuotes[] +ROW message = "foo * bar" +| WHERE message LIKE """foo \* bar""" +// end::likeEscapingTripleQuotes[] +; + +// tag::likeEscapingTripleQuotes-result[] +message:keyword +foo * bar +// end::likeEscapingTripleQuotes-result[] +; + +rlikeEscapingSingleQuotes +// tag::rlikeEscapingSingleQuotes[] +ROW message = "foo ( bar" +| WHERE message RLIKE "foo \\( bar" +// end::rlikeEscapingSingleQuotes[] +; + +// tag::rlikeEscapingSingleQuotes-result[] +message:keyword +foo ( bar +// end::rlikeEscapingSingleQuotes-result[] +; + + +rlikeEscapingTripleQuotes +// tag::rlikeEscapingTripleQuotes[] +ROW message = "foo ( bar" +| WHERE message RLIKE """foo \( bar""" +// end::rlikeEscapingTripleQuotes[] +; + +// tag::rlikeEscapingTripleQuotes-result[] +message:keyword +foo ( bar +// end::rlikeEscapingTripleQuotes-result[] +; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/RLike.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/RLike.java index 56090b0fb9e32..ad472fe989f5f 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/RLike.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/RLike.java @@ -16,6 +16,9 @@ import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.evaluator.mapper.EvaluatorMapper; +import org.elasticsearch.xpack.esql.expression.function.Example; +import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; +import org.elasticsearch.xpack.esql.expression.function.Param; import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; import java.io.IOException; @@ -27,7 +30,32 @@ public class RLike extends org.elasticsearch.xpack.esql.core.expression.predicate.regex.RLike implements EvaluatorMapper { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "RLike", RLike::new); - public RLike(Source source, Expression value, RLikePattern pattern) { + @FunctionInfo(returnType = "boolean", description = """ + Use `RLIKE` to filter data based on string patterns using using + <>. `RLIKE` usually acts on a field placed on + the left-hand side of the operator, but it can also act on a constant (literal) + expression. The right-hand side of the operator represents the pattern.""", detailedDescription = """ + Matching special characters (eg. `.`, `*`, `(`...) will require escaping. + The escape character is backslash `\\`. Since also backslash is a special character in string literals, + it will require further escaping. + + [source.merge.styled,esql] + ---- + include::{esql-specs}/string.csv-spec[tag=rlikeEscapingSingleQuotes] + ---- + + To reduce the overhead of escaping, we suggest using triple quotes strings `\"\"\"` + + [source.merge.styled,esql] + ---- + include::{esql-specs}/string.csv-spec[tag=rlikeEscapingTripleQuotes] + ---- + """, examples = @Example(file = "docs", tag = "rlike")) + public RLike( + Source source, + @Param(name = "str", type = { "keyword", "text" }, description = "A literal value.") Expression value, + @Param(name = "pattern", type = { "keyword", "text" }, description = "A regular expression.") RLikePattern pattern + ) { super(source, value, pattern); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/WildcardLike.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/WildcardLike.java index 325cc0aea4461..99ee2c75806a1 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/WildcardLike.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/WildcardLike.java @@ -44,7 +44,23 @@ also act on a constant (literal) expression. The right-hand side of the operator The following wildcard characters are supported: * `*` matches zero or more characters. - * `?` matches one character.""", examples = @Example(file = "docs", tag = "like")) + * `?` matches one character.""", detailedDescription = """ + Matching the exact characters `*` and `.` will require escaping. + The escape character is backslash `\\`. Since also backslash is a special character in string literals, + it will require further escaping. + + [source.merge.styled,esql] + ---- + include::{esql-specs}/string.csv-spec[tag=likeEscapingSingleQuotes] + ---- + + To reduce the overhead of escaping, we suggest using triple quotes strings `\"\"\"` + + [source.merge.styled,esql] + ---- + include::{esql-specs}/string.csv-spec[tag=likeEscapingTripleQuotes] + ---- + """, examples = @Example(file = "docs", tag = "like")) public WildcardLike( Source source, @Param(name = "str", type = { "keyword", "text" }) Expression left,