diff --git a/docs/changelog/133016.yaml b/docs/changelog/133016.yaml new file mode 100644 index 0000000000000..8b407ef405d35 --- /dev/null +++ b/docs/changelog/133016.yaml @@ -0,0 +1,5 @@ +pr: 133016 +summary: Adding Contains ESQL String function +area: ES|QL +type: feature +issues: [] diff --git a/docs/reference/query-languages/esql/_snippets/functions/description/contains.md b/docs/reference/query-languages/esql/_snippets/functions/description/contains.md new file mode 100644 index 0000000000000..ac1a44db4795c --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/description/contains.md @@ -0,0 +1,6 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Description** + +Returns true if a keyword substring is within another string. Returns false if the substring cannot be found. + diff --git a/docs/reference/query-languages/esql/_snippets/functions/examples/contains.md b/docs/reference/query-languages/esql/_snippets/functions/examples/contains.md new file mode 100644 index 0000000000000..db28c1413cb83 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/examples/contains.md @@ -0,0 +1,14 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Example** + +```esql +ROW a = "hello" +| EVAL has_ll = CONTAINS(a, "ll") +``` + +| a:keyword | has_ll:boolean | +| --- | --- | +| hello | true | + + diff --git a/docs/reference/query-languages/esql/_snippets/functions/layout/contains.md b/docs/reference/query-languages/esql/_snippets/functions/layout/contains.md new file mode 100644 index 0000000000000..8fcedc8306f13 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/layout/contains.md @@ -0,0 +1,23 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +## `CONTAINS` [esql-contains] + +**Syntax** + +:::{image} ../../../images/functions/contains.svg +:alt: Embedded +:class: text-center +::: + + +:::{include} ../parameters/contains.md +::: + +:::{include} ../description/contains.md +::: + +:::{include} ../types/contains.md +::: + +:::{include} ../examples/contains.md +::: diff --git a/docs/reference/query-languages/esql/_snippets/functions/parameters/contains.md b/docs/reference/query-languages/esql/_snippets/functions/parameters/contains.md new file mode 100644 index 0000000000000..6f67af31f149a --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/parameters/contains.md @@ -0,0 +1,10 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Parameters** + +`string` +: An input string + +`substring` +: A substring to find in the input string + diff --git a/docs/reference/query-languages/esql/_snippets/functions/types/contains.md b/docs/reference/query-languages/esql/_snippets/functions/types/contains.md new file mode 100644 index 0000000000000..06a2658eca86d --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/types/contains.md @@ -0,0 +1,11 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Supported types** + +| string | substring | result | +| --- | --- | --- | +| keyword | keyword | boolean | +| keyword | text | boolean | +| text | keyword | boolean | +| text | text | boolean | + diff --git a/docs/reference/query-languages/esql/_snippets/lists/string-functions.md b/docs/reference/query-languages/esql/_snippets/lists/string-functions.md index 612f59d7056a6..98552c19685d6 100644 --- a/docs/reference/query-languages/esql/_snippets/lists/string-functions.md +++ b/docs/reference/query-languages/esql/_snippets/lists/string-functions.md @@ -1,6 +1,7 @@ * [`BIT_LENGTH`](../../functions-operators/string-functions.md#esql-bit_length) * [`BYTE_LENGTH`](../../functions-operators/string-functions.md#esql-byte_length) * [`CONCAT`](../../functions-operators/string-functions.md#esql-concat) +* [`CONTAINS`](../../functions-operators/string-functions.md#esql-contains) * [`ENDS_WITH`](../../functions-operators/string-functions.md#esql-ends_with) * [`FROM_BASE64`](../../functions-operators/string-functions.md#esql-from_base64) * [`HASH`](../../functions-operators/string-functions.md#esql-hash) diff --git a/docs/reference/query-languages/esql/functions-operators/string-functions.md b/docs/reference/query-languages/esql/functions-operators/string-functions.md index 4d053709f0ed3..09ca490293aba 100644 --- a/docs/reference/query-languages/esql/functions-operators/string-functions.md +++ b/docs/reference/query-languages/esql/functions-operators/string-functions.md @@ -21,6 +21,9 @@ mapped_pages: :::{include} ../_snippets/functions/layout/concat.md ::: +:::{include} ../_snippets/functions/layout/contains.md +::: + :::{include} ../_snippets/functions/layout/ends_with.md ::: diff --git a/docs/reference/query-languages/esql/images/functions/contains.svg b/docs/reference/query-languages/esql/images/functions/contains.svg new file mode 100644 index 0000000000000..4264b83f783e3 --- /dev/null +++ b/docs/reference/query-languages/esql/images/functions/contains.svg @@ -0,0 +1 @@ +CONTAINS(string,substring) \ No newline at end of file diff --git a/docs/reference/query-languages/esql/kibana/definition/functions/contains.json b/docs/reference/query-languages/esql/kibana/definition/functions/contains.json new file mode 100644 index 0000000000000..72e01fc5483ff --- /dev/null +++ b/docs/reference/query-languages/esql/kibana/definition/functions/contains.json @@ -0,0 +1,85 @@ +{ + "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it.", + "type" : "scalar", + "name" : "contains", + "description" : "Returns true if a keyword substring is within another string.\nReturns false if the substring cannot be found.", + "signatures" : [ + { + "params" : [ + { + "name" : "string", + "type" : "keyword", + "optional" : false, + "description" : "An input string" + }, + { + "name" : "substring", + "type" : "keyword", + "optional" : false, + "description" : "A substring to find in the input string" + } + ], + "variadic" : false, + "returnType" : "boolean" + }, + { + "params" : [ + { + "name" : "string", + "type" : "keyword", + "optional" : false, + "description" : "An input string" + }, + { + "name" : "substring", + "type" : "text", + "optional" : false, + "description" : "A substring to find in the input string" + } + ], + "variadic" : false, + "returnType" : "boolean" + }, + { + "params" : [ + { + "name" : "string", + "type" : "text", + "optional" : false, + "description" : "An input string" + }, + { + "name" : "substring", + "type" : "keyword", + "optional" : false, + "description" : "A substring to find in the input string" + } + ], + "variadic" : false, + "returnType" : "boolean" + }, + { + "params" : [ + { + "name" : "string", + "type" : "text", + "optional" : false, + "description" : "An input string" + }, + { + "name" : "substring", + "type" : "text", + "optional" : false, + "description" : "A substring to find in the input string" + } + ], + "variadic" : false, + "returnType" : "boolean" + } + ], + "examples" : [ + "ROW a = \"hello\"\n| EVAL has_ll = CONTAINS(a, \"ll\")" + ], + "preview" : false, + "snapshot_only" : false +} diff --git a/docs/reference/query-languages/esql/kibana/docs/functions/contains.md b/docs/reference/query-languages/esql/kibana/docs/functions/contains.md new file mode 100644 index 0000000000000..f380fd07ba546 --- /dev/null +++ b/docs/reference/query-languages/esql/kibana/docs/functions/contains.md @@ -0,0 +1,10 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +### CONTAINS +Returns true if a keyword substring is within another string. +Returns false if the substring cannot be found. + +```esql +ROW a = "hello" +| EVAL has_ll = CONTAINS(a, "ll") +``` diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/semantic_text.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/semantic_text.csv-spec index 891405c70ffbb..d372673881752 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/semantic_text.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/semantic_text.csv-spec @@ -515,6 +515,21 @@ result:keyword live long and prosper ; +contains +required_capability: semantic_text_field_caps + +FROM semantic_text METADATA _id +| EVAL result = contains(semantic_text_field, "all") +| KEEP _id, result +| SORT _id +; + +_id:keyword | result:boolean +1 | false +2 | true +3 | false +; + endsWith required_capability: semantic_text_field_caps diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec index 2602378c64615..7110b299e13a2 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec @@ -388,6 +388,101 @@ emp_no:integer | name:keyword 10010 | null ; + +contains +required_capability: fn_contains +// tag::contains[] +ROW a = "hello" +| EVAL has_ll = CONTAINS(a, "ll") +// end::contains[] +; + +// tag::contains-result[] +a:keyword | has_ll:boolean +hello | true +// end::contains-result[] +; + +containsFail +required_capability: fn_contains +row a = "hello" | eval a_ll = contains(a, "int"); + +a:keyword | a_ll:boolean +hello | false +; + +containsLongerSubstr +required_capability: fn_contains +row a = "hello" | eval a_ll = contains(a, "farewell"); + +a:keyword | a_ll:boolean +hello | false +; + +containsSame +required_capability: fn_contains +row a = "hello" | eval a_ll = contains(a, "hello"); + +a:keyword | a_ll:boolean +hello | true +; + +containsWithSubstring +required_capability: fn_contains +from employees | where emp_no <= 10010 | eval f_s = substring(last_name, 2) | eval f_l = contains(last_name, f_s) | keep emp_no, last_name, f_s, f_l; +ignoreOrder:true + +emp_no:integer | last_name:keyword | f_s:keyword | f_l:boolean +10001 | Facello | acello | true +10002 | Simmel | immel | true +10003 | Bamford | amford | true +10004 | Koblick | oblick | true +10005 | Maliniak | aliniak | true +10006 | Preusig | reusig | true +10007 | Zielinski | ielinski | true +10008 | Kalloufi | alloufi | true +10009 | Peac | eac | true +10010 | Piveteau | iveteau | true +; + +containsUtf16Emoji +required_capability: fn_contains +row a = "๐ŸฑMeow!๐ŸถWoof!" | eval f_s = substring(a, 2) | eval f_l = contains(a, f_s); + +a:keyword | f_s:keyword | f_l:boolean +๐ŸฑMeow!๐ŸถWoof! | Meow!๐ŸถWoof! | true +; + +containsNestedCase +required_capability: fn_contains +row a = "hello" | eval a_ll = CASE(contains(a, "ll"), "success","fail"); + +a:keyword | a_ll:keyword +hello | success +; + +containsNestSubstring +required_capability: fn_contains +row a = "hello" | eval a_ll = contains(substring(a, 2), "ll"); + +a:keyword | a_ll:boolean +hello | true +; + +containsWarnings +required_capability: fn_contains + +from hosts | where host=="epsilon" | eval l1 = contains(host_group, "ate"), l2 = contains(description, "ate") | keep l1, l2; +ignoreOrder:true +warning:Line 1:82: evaluation of [contains(description, \"ate\")] failed, treating result as null. Only first 20 failures recorded. +warning:Line 1:82: java.lang.IllegalArgumentException: single-value function encountered multi-value + +l1:boolean | l2:boolean +true | null +true | null +null | false +; + // Note: no matches in MV returned in diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ContainsEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ContainsEvaluator.java new file mode 100644 index 0000000000000..cd4d40dabaec2 --- /dev/null +++ b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ContainsEvaluator.java @@ -0,0 +1,154 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License +// 2.0; you may not use this file except in compliance with the Elastic License +// 2.0. +package org.elasticsearch.xpack.esql.expression.function.scalar.string; + +import java.lang.IllegalArgumentException; +import java.lang.Override; +import java.lang.String; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BooleanBlock; +import org.elasticsearch.compute.data.BooleanVector; +import org.elasticsearch.compute.data.BytesRefBlock; +import org.elasticsearch.compute.data.BytesRefVector; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.EvalOperator; +import org.elasticsearch.compute.operator.Warnings; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.xpack.esql.core.tree.Source; + +/** + * {@link EvalOperator.ExpressionEvaluator} implementation for {@link Contains}. + * This class is generated. Edit {@code EvaluatorImplementer} instead. + */ +public final class ContainsEvaluator implements EvalOperator.ExpressionEvaluator { + private final Source source; + + private final EvalOperator.ExpressionEvaluator str; + + private final EvalOperator.ExpressionEvaluator substr; + + private final DriverContext driverContext; + + private Warnings warnings; + + public ContainsEvaluator(Source source, EvalOperator.ExpressionEvaluator str, + EvalOperator.ExpressionEvaluator substr, DriverContext driverContext) { + this.source = source; + this.str = str; + this.substr = substr; + this.driverContext = driverContext; + } + + @Override + public Block eval(Page page) { + try (BytesRefBlock strBlock = (BytesRefBlock) str.eval(page)) { + try (BytesRefBlock substrBlock = (BytesRefBlock) substr.eval(page)) { + BytesRefVector strVector = strBlock.asVector(); + if (strVector == null) { + return eval(page.getPositionCount(), strBlock, substrBlock); + } + BytesRefVector substrVector = substrBlock.asVector(); + if (substrVector == null) { + return eval(page.getPositionCount(), strBlock, substrBlock); + } + return eval(page.getPositionCount(), strVector, substrVector).asBlock(); + } + } + } + + public BooleanBlock eval(int positionCount, BytesRefBlock strBlock, BytesRefBlock substrBlock) { + try(BooleanBlock.Builder result = driverContext.blockFactory().newBooleanBlockBuilder(positionCount)) { + BytesRef strScratch = new BytesRef(); + BytesRef substrScratch = new BytesRef(); + position: for (int p = 0; p < positionCount; p++) { + if (strBlock.isNull(p)) { + result.appendNull(); + continue position; + } + if (strBlock.getValueCount(p) != 1) { + if (strBlock.getValueCount(p) > 1) { + warnings().registerException(new IllegalArgumentException("single-value function encountered multi-value")); + } + result.appendNull(); + continue position; + } + if (substrBlock.isNull(p)) { + result.appendNull(); + continue position; + } + if (substrBlock.getValueCount(p) != 1) { + if (substrBlock.getValueCount(p) > 1) { + warnings().registerException(new IllegalArgumentException("single-value function encountered multi-value")); + } + result.appendNull(); + continue position; + } + result.appendBoolean(Contains.process(strBlock.getBytesRef(strBlock.getFirstValueIndex(p), strScratch), substrBlock.getBytesRef(substrBlock.getFirstValueIndex(p), substrScratch))); + } + return result.build(); + } + } + + public BooleanVector eval(int positionCount, BytesRefVector strVector, + BytesRefVector substrVector) { + try(BooleanVector.FixedBuilder result = driverContext.blockFactory().newBooleanVectorFixedBuilder(positionCount)) { + BytesRef strScratch = new BytesRef(); + BytesRef substrScratch = new BytesRef(); + position: for (int p = 0; p < positionCount; p++) { + result.appendBoolean(p, Contains.process(strVector.getBytesRef(p, strScratch), substrVector.getBytesRef(p, substrScratch))); + } + return result.build(); + } + } + + @Override + public String toString() { + return "ContainsEvaluator[" + "str=" + str + ", substr=" + substr + "]"; + } + + @Override + public void close() { + Releasables.closeExpectNoException(str, substr); + } + + private Warnings warnings() { + if (warnings == null) { + this.warnings = Warnings.createWarnings( + driverContext.warningsMode(), + source.source().getLineNumber(), + source.source().getColumnNumber(), + source.text() + ); + } + return warnings; + } + + static class Factory implements EvalOperator.ExpressionEvaluator.Factory { + private final Source source; + + private final EvalOperator.ExpressionEvaluator.Factory str; + + private final EvalOperator.ExpressionEvaluator.Factory substr; + + public Factory(Source source, EvalOperator.ExpressionEvaluator.Factory str, + EvalOperator.ExpressionEvaluator.Factory substr) { + this.source = source; + this.str = str; + this.substr = substr; + } + + @Override + public ContainsEvaluator get(DriverContext context) { + return new ContainsEvaluator(source, str.get(context), substr.get(context), context); + } + + @Override + public String toString() { + return "ContainsEvaluator[" + "str=" + str + ", substr=" + substr + "]"; + } + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index cdef9f8c33cbd..94c6c415ed952 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -190,6 +190,11 @@ public enum Cap { */ FN_REVERSE_GRAPHEME_CLUSTERS, + /** + * Support for function {@code CONTAINS}. Done in #133016. + */ + FN_CONTAINS, + /** * Support for function {@code CBRT}. Done in #108574. */ diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java index 9d6372702d842..ccb3f94c1e311 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java @@ -162,6 +162,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.string.BitLength; import org.elasticsearch.xpack.esql.expression.function.scalar.string.ByteLength; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Concat; +import org.elasticsearch.xpack.esql.expression.function.scalar.string.Contains; import org.elasticsearch.xpack.esql.expression.function.scalar.string.EndsWith; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Hash; import org.elasticsearch.xpack.esql.expression.function.scalar.string.LTrim; @@ -369,6 +370,7 @@ private static FunctionDefinition[][] functions() { def(BitLength.class, BitLength::new, "bit_length"), def(ByteLength.class, ByteLength::new, "byte_length"), def(Concat.class, Concat::new, "concat"), + def(Contains.class, Contains::new, "contains"), def(EndsWith.class, EndsWith::new, "ends_with"), def(Hash.class, Hash::new, "hash"), def(LTrim.class, LTrim::new, "ltrim"), diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/ScalarFunctionWritables.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/ScalarFunctionWritables.java index 4e0d28fd7d6a6..978f0218a798f 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/ScalarFunctionWritables.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/ScalarFunctionWritables.java @@ -36,6 +36,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.nulls.Coalesce; import org.elasticsearch.xpack.esql.expression.function.scalar.string.BitLength; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Concat; +import org.elasticsearch.xpack.esql.expression.function.scalar.string.Contains; import org.elasticsearch.xpack.esql.expression.function.scalar.string.EndsWith; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Hash; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Left; @@ -70,6 +71,7 @@ public static List getNamedWriteables() { entries.add(CIDRMatch.ENTRY); entries.add(Coalesce.ENTRY); entries.add(Concat.ENTRY); + entries.add(Contains.ENTRY); entries.add(E.ENTRY); entries.add(EndsWith.ENTRY); entries.add(FromAggregateMetricDouble.ENTRY); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Contains.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Contains.java new file mode 100644 index 0000000000000..fd7f914f2a44c --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Contains.java @@ -0,0 +1,134 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.scalar.string; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.compute.ann.Evaluator; +import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.tree.NodeInfo; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.function.Example; +import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; +import org.elasticsearch.xpack.esql.expression.function.OptionalArgument; +import org.elasticsearch.xpack.esql.expression.function.Param; +import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction; +import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; + +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString; + +/** + * Contains function, given a string 'a' and a substring 'b', returns true if the substring 'b' is in 'a'. + */ +public class Contains extends EsqlScalarFunction implements OptionalArgument { + public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Contains", Contains::new); + + private final Expression str; + private final Expression substr; + + @FunctionInfo(returnType = "boolean", description = """ + Returns true if a keyword substring is within another string. + Returns false if the substring cannot be found.""", examples = @Example(file = "string", tag = "contains")) + public Contains( + Source source, + @Param(name = "string", type = { "keyword", "text" }, description = "An input string") Expression str, + @Param(name = "substring", type = { "keyword", "text" }, description = "A substring to find in the input string") Expression substr + ) { + super(source, Arrays.asList(str, substr)); + this.str = str; + this.substr = substr; + } + + private Contains(StreamInput in) throws IOException { + this(Source.readFrom((PlanStreamInput) in), in.readNamedWriteable(Expression.class), in.readNamedWriteable(Expression.class)); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + source().writeTo(out); + out.writeNamedWriteable(str); + out.writeNamedWriteable(substr); + } + + @Override + public String getWriteableName() { + return ENTRY.name; + } + + @Override + public DataType dataType() { + return DataType.BOOLEAN; + } + + @Override + protected TypeResolution resolveType() { + if (childrenResolved() == false) { + return new TypeResolution("Unresolved children"); + } + + TypeResolution resolution = isString(str, sourceText(), FIRST); + if (resolution.unresolved()) { + return resolution; + } + resolution = isString(substr, sourceText(), SECOND); + if (resolution.unresolved()) { + return resolution; + } + + return TypeResolution.TYPE_RESOLVED; + } + + @Override + public boolean foldable() { + return str.foldable() && substr.foldable(); + } + + @Evaluator + static boolean process(BytesRef str, BytesRef substr) { + if (str.length < substr.length) { + return false; + } + return str.utf8ToString().contains(substr.utf8ToString()); + } + + @Override + public Expression replaceChildren(List newChildren) { + return new Contains(source(), newChildren.get(0), newChildren.get(1)); + } + + @Override + protected NodeInfo info() { + return NodeInfo.create(this, Contains::new, str, substr); + } + + @Override + public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { + ExpressionEvaluator.Factory strExpr = toEvaluator.apply(str); + ExpressionEvaluator.Factory substrExpr = toEvaluator.apply(substr); + + return new ContainsEvaluator.Factory(source(), strExpr, substrExpr); + } + + Expression str() { + return str; + } + + Expression substr() { + return substr; + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ContainsTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ContainsTests.java new file mode 100644 index 0000000000000..48856a572ad9b --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ContainsTests.java @@ -0,0 +1,167 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.scalar.string; + +import com.carrotsearch.randomizedtesting.annotations.Name; +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.function.AbstractScalarFunctionTestCase; +import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier; + +import java.util.ArrayList; +import java.util.List; +import java.util.Locale; +import java.util.function.Function; +import java.util.function.Supplier; + +import static org.hamcrest.Matchers.equalTo; + +/** + * Tests for {@link Locate} function. + */ +public class ContainsTests extends AbstractScalarFunctionTestCase { + public ContainsTests(@Name("TestCase") Supplier testCaseSupplier) { + this.testCase = testCaseSupplier.get(); + } + + @ParametersFactory + public static Iterable parameters() { + List suppliers = new ArrayList<>(); + for (DataType strType : DataType.stringTypes()) { + for (DataType substrType : DataType.stringTypes()) { + suppliers.add( + supplier( + "", + strType, + substrType, + () -> randomRealisticUnicodeOfCodepointLength(10), + str -> randomRealisticUnicodeOfCodepointLength(2), + String::contains + ) + ); + suppliers.add( + supplier( + "exact match ", + strType, + substrType, + () -> randomRealisticUnicodeOfCodepointLength(10), + str -> str, + (str, substr) -> true + ) + ); + } + } + + // Here follows some non-randomized examples that we want to cover on every run + suppliers.add(supplier("a tiger", "a t", true)); + suppliers.add(supplier("a tiger", "a", true)); + suppliers.add(supplier("็•Œไธ–", "็•Œ", true)); + suppliers.add(supplier("a tiger", "er", true)); + suppliers.add(supplier("a tiger", "r", true)); + suppliers.add(supplier("็•Œไธ–", "ไธ–", true)); + suppliers.add(supplier("a tiger", "ti", true)); + suppliers.add(supplier("a tiger", "ige", true)); + suppliers.add(supplier("ไธ–็•Œไธ–", "็•Œ", true)); + suppliers.add(supplier("a tiger", "tigers", false)); + suppliers.add(supplier("a tiger", "ipa", false)); + suppliers.add(supplier("ไธ–็•Œไธ–", "\uD83C\uDF0D", false)); + + suppliers.add(supplier("a ti๐ œŽer", "๐ œŽer", true)); + suppliers.add(supplier("a ti๐ œŽer", "i๐ œŽe", true)); + suppliers.add(supplier("a ti๐ œŽer", "ti๐ œŽ", true)); + suppliers.add(supplier("a ti๐ œŽer", "er", true)); + suppliers.add(supplier("a ti๐ œŽer", "r", true)); + suppliers.add(supplier("a ti๐ œŽer", "a ti๐ œŽer", true)); + // prefix + suppliers.add(supplier("๐ œŽabc", "๐ œŽ", true)); + suppliers.add(supplier("๐ œŽ abc", "๐ œŽ ", true)); + suppliers.add(supplier("๐ œŽ๐ œŽ๐ œŽabc", "๐ œŽ๐ œŽ๐ œŽ", true)); + suppliers.add(supplier("๐ œŽ๐ œŽ๐ œŽ abc", "๐ œŽ๐ œŽ๐ œŽ ", true)); + suppliers.add(supplier(" ๐ œŽ๐ œŽ๐ œŽ abc", " ๐ œŽ๐ œŽ๐ œŽ ", true)); + suppliers.add(supplier("๐ œŽ ๐ œŽ ๐ œŽ abc", "๐ œŽ ๐ œŽ ๐ œŽ ", true)); + // suffix + suppliers.add(supplier("abc๐ œŽ", "๐ œŽ", true)); + suppliers.add(supplier("abc ๐ œŽ", " ๐ œŽ", true)); + suppliers.add(supplier("abc๐ œŽ๐ œŽ๐ œŽ", "๐ œŽ๐ œŽ๐ œŽ", true)); + suppliers.add(supplier("abc ๐ œŽ๐ œŽ๐ œŽ", " ๐ œŽ๐ œŽ๐ œŽ", true)); + suppliers.add(supplier("abc๐ œŽ๐ œŽ๐ œŽ ", "๐ œŽ๐ œŽ๐ œŽ ", true)); + // out of range + suppliers.add(supplier("๐ œŽa ti๐ œŽer", "๐ œŽa ti๐ œŽers", false)); + suppliers.add(supplier("a ti๐ œŽer", "aa ti๐ œŽer", false)); + suppliers.add(supplier("abc๐ œŽ๐ œŽ", "๐ œŽ๐ œŽ๐ œŽ", false)); + + suppliers.add(supplier("๐ŸฑMeow!๐ŸถWoof!", "๐ŸฑMeow!๐ŸถWoof!", true)); + suppliers.add(supplier("๐ŸฑMeow!๐ŸถWoof!", "Meow!๐ŸถWoof!", true)); + suppliers.add(supplier("๐ŸฑMeow!๐ŸถWoof!", "eow!๐ŸถWoof!", true)); + + return parameterSuppliersFromTypedDataWithDefaultChecksNoErrors(true, suppliers); + } + + @Override + protected Expression build(Source source, List args) { + return new Contains(source, args.get(0), args.get(1)); + } + + private static TestCaseSupplier supplier(String str, String substr, @Nullable Boolean expectedValue) { + String name = String.format(Locale.ROOT, "\"%s\" in \"%s\"", substr, str); + return new TestCaseSupplier( + name, + types(DataType.KEYWORD, DataType.KEYWORD), + () -> testCase(DataType.KEYWORD, DataType.KEYWORD, str, substr, expectedValue) + ); + } + + interface ExpectedValue { + boolean expectedValue(String str, String substr); + } + + private static TestCaseSupplier supplier( + String name, + DataType strType, + DataType substrType, + Supplier strValueSupplier, + Function substrValueSupplier, + ExpectedValue expectedValue + ) { + List types = types(strType, substrType); + return new TestCaseSupplier(name + TestCaseSupplier.nameFromTypes(types), types, () -> { + String str = strValueSupplier.get(); + String substr = substrValueSupplier.apply(str); + return testCase(strType, substrType, str, substr, expectedValue.expectedValue(str, substr)); + }); + } + + private static String expectedToString() { + return "ContainsEvaluator[str=Attribute[channel=0], substr=Attribute[channel=1]]"; + } + + private static List types(DataType firstType, DataType secondType) { + List types = new ArrayList<>(); + types.add(firstType); + types.add(secondType); + return types; + } + + private static TestCaseSupplier.TestCase testCase( + DataType strType, + DataType substrType, + String str, + String substr, + Boolean expectedValue + ) { + List values = new ArrayList<>(); + values.add(new TestCaseSupplier.TypedData(str == null ? null : new BytesRef(str), strType, "str")); + values.add(new TestCaseSupplier.TypedData(substr == null ? null : new BytesRef(substr), substrType, "substr")); + return new TestCaseSupplier.TestCase(values, expectedToString(), DataType.BOOLEAN, equalTo(expectedValue)); + } +}