From f4f3c356fbb79a2256ce374ad686b51844e6e18a Mon Sep 17 00:00:00 2001 From: mjmbischoff Date: Sat, 16 Aug 2025 02:12:45 +0200 Subject: [PATCH 01/16] Adding Contains function --- .../src/main/resources/semantic_text.csv-spec | 15 ++ .../src/main/resources/string.csv-spec | 86 +++++++++ .../scalar/string/ContainsEvaluator.java | 154 ++++++++++++++++ .../function/EsqlFunctionRegistry.java | 2 + .../scalar/ScalarFunctionWritables.java | 2 + .../function/scalar/string/Contains.java | 144 +++++++++++++++ .../function/scalar/string/ContainsTests.java | 171 ++++++++++++++++++ 7 files changed, 574 insertions(+) create mode 100644 x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ContainsEvaluator.java create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Contains.java create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ContainsTests.java diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/semantic_text.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/semantic_text.csv-spec index 891405c70ffbb..d372673881752 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/semantic_text.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/semantic_text.csv-spec @@ -515,6 +515,21 @@ result:keyword live long and prosper ; +contains +required_capability: semantic_text_field_caps + +FROM semantic_text METADATA _id +| EVAL result = contains(semantic_text_field, "all") +| KEEP _id, result +| SORT _id +; + +_id:keyword | result:boolean +1 | false +2 | true +3 | false +; + endsWith required_capability: semantic_text_field_caps diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec index 2602378c64615..f3654010d4471 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec @@ -388,6 +388,92 @@ emp_no:integer | name:keyword 10010 | null ; + +contains#[skip:-9.2.99,reason:new string function added in 9.3] +// tag::contains[] +ROW a = "hello" +| EVAL a_ll = CONTAINS(a, "ll") +// end::contains[] +; + +// tag::contains-result[] +a:keyword | a_ll:boolean +hello | true +// end::contains-result[] +; + +containsFail#[skip:-9.2.99,reason:new string function added in 9.3] +row a = "hello" | eval a_ll = contains(a, "int"); + +a:keyword | a_ll:boolean +hello | false +; + +containsLongerSubstr#[skip:-9.2.99,reason:new string function added in 9.3] +row a = "hello" | eval a_ll = contains(a, "farewell"); + +a:keyword | a_ll:boolean +hello | false +; + +containsSame#[skip:-9.2.99,reason:new string function added in 9.3] +row a = "hello" | eval a_ll = contains(a, "hello"); + +a:keyword | a_ll:boolean +hello | true +; + +containsWithSubstring#[skip:-9.2.99,reason:new string function added in 9.3] +from employees | where emp_no <= 10010 | eval f_s = substring(last_name, 2) | eval f_l = contains(last_name, f_s) | keep emp_no, last_name, f_s, f_l; +ignoreOrder:true + +emp_no:integer | last_name:keyword | f_s:keyword | f_l:boolean +10001 | Facello | acello | true +10002 | Simmel | immel | true +10003 | Bamford | amford | true +10004 | Koblick | oblick | true +10005 | Maliniak | aliniak | true +10006 | Preusig | reusig | true +10007 | Zielinski | ielinski | true +10008 | Kalloufi | alloufi | true +10009 | Peac | eac | true +10010 | Piveteau | iveteau | true +; + +containsUtf16Emoji#[skip:-9.2.99,reason:new string function added in 9.3] +row a = "🐱Meow!🐶Woof!" | eval f_s = substring(a, 2) | eval f_l = contains(a, f_s); + +a:keyword | f_s:keyword | f_l:boolean +🐱Meow!🐶Woof! | Meow!🐶Woof! | true +; + +containsNestedCase#[skip:-9.2.99,reason:new string function added in 9.3] +row a = "hello" | eval a_ll = CASE(contains(a, "ll"), "success","fail"); + +a:keyword | a_ll:keyword +hello | success +; + +containsNestSubstring#[skip:-9.2.99,reason:new string function added in 9.3] +row a = "hello" | eval a_ll = contains(substring(a, 2), "ll"); + +a:keyword | a_ll:boolean +hello | true +; + +containsWarnings#[skip:-9.2.99,reason:new string function added in 9.3] + +from hosts | where host=="epsilon" | eval l1 = contains(host_group, "ate"), l2 = contains(description, "ate") | keep l1, l2; +ignoreOrder:true +warning:Line 1:80: evaluation of [contains(description, \"ate\")] failed, treating result as null. Only first 20 failures recorded. +warning:Line 1:80: java.lang.IllegalArgumentException: single-value function encountered multi-value + +l1:boolean | l2:boolean +true | null +true | null +null | false +; + // Note: no matches in MV returned in diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ContainsEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ContainsEvaluator.java new file mode 100644 index 0000000000000..cd4d40dabaec2 --- /dev/null +++ b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ContainsEvaluator.java @@ -0,0 +1,154 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License +// 2.0; you may not use this file except in compliance with the Elastic License +// 2.0. +package org.elasticsearch.xpack.esql.expression.function.scalar.string; + +import java.lang.IllegalArgumentException; +import java.lang.Override; +import java.lang.String; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BooleanBlock; +import org.elasticsearch.compute.data.BooleanVector; +import org.elasticsearch.compute.data.BytesRefBlock; +import org.elasticsearch.compute.data.BytesRefVector; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.EvalOperator; +import org.elasticsearch.compute.operator.Warnings; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.xpack.esql.core.tree.Source; + +/** + * {@link EvalOperator.ExpressionEvaluator} implementation for {@link Contains}. + * This class is generated. Edit {@code EvaluatorImplementer} instead. + */ +public final class ContainsEvaluator implements EvalOperator.ExpressionEvaluator { + private final Source source; + + private final EvalOperator.ExpressionEvaluator str; + + private final EvalOperator.ExpressionEvaluator substr; + + private final DriverContext driverContext; + + private Warnings warnings; + + public ContainsEvaluator(Source source, EvalOperator.ExpressionEvaluator str, + EvalOperator.ExpressionEvaluator substr, DriverContext driverContext) { + this.source = source; + this.str = str; + this.substr = substr; + this.driverContext = driverContext; + } + + @Override + public Block eval(Page page) { + try (BytesRefBlock strBlock = (BytesRefBlock) str.eval(page)) { + try (BytesRefBlock substrBlock = (BytesRefBlock) substr.eval(page)) { + BytesRefVector strVector = strBlock.asVector(); + if (strVector == null) { + return eval(page.getPositionCount(), strBlock, substrBlock); + } + BytesRefVector substrVector = substrBlock.asVector(); + if (substrVector == null) { + return eval(page.getPositionCount(), strBlock, substrBlock); + } + return eval(page.getPositionCount(), strVector, substrVector).asBlock(); + } + } + } + + public BooleanBlock eval(int positionCount, BytesRefBlock strBlock, BytesRefBlock substrBlock) { + try(BooleanBlock.Builder result = driverContext.blockFactory().newBooleanBlockBuilder(positionCount)) { + BytesRef strScratch = new BytesRef(); + BytesRef substrScratch = new BytesRef(); + position: for (int p = 0; p < positionCount; p++) { + if (strBlock.isNull(p)) { + result.appendNull(); + continue position; + } + if (strBlock.getValueCount(p) != 1) { + if (strBlock.getValueCount(p) > 1) { + warnings().registerException(new IllegalArgumentException("single-value function encountered multi-value")); + } + result.appendNull(); + continue position; + } + if (substrBlock.isNull(p)) { + result.appendNull(); + continue position; + } + if (substrBlock.getValueCount(p) != 1) { + if (substrBlock.getValueCount(p) > 1) { + warnings().registerException(new IllegalArgumentException("single-value function encountered multi-value")); + } + result.appendNull(); + continue position; + } + result.appendBoolean(Contains.process(strBlock.getBytesRef(strBlock.getFirstValueIndex(p), strScratch), substrBlock.getBytesRef(substrBlock.getFirstValueIndex(p), substrScratch))); + } + return result.build(); + } + } + + public BooleanVector eval(int positionCount, BytesRefVector strVector, + BytesRefVector substrVector) { + try(BooleanVector.FixedBuilder result = driverContext.blockFactory().newBooleanVectorFixedBuilder(positionCount)) { + BytesRef strScratch = new BytesRef(); + BytesRef substrScratch = new BytesRef(); + position: for (int p = 0; p < positionCount; p++) { + result.appendBoolean(p, Contains.process(strVector.getBytesRef(p, strScratch), substrVector.getBytesRef(p, substrScratch))); + } + return result.build(); + } + } + + @Override + public String toString() { + return "ContainsEvaluator[" + "str=" + str + ", substr=" + substr + "]"; + } + + @Override + public void close() { + Releasables.closeExpectNoException(str, substr); + } + + private Warnings warnings() { + if (warnings == null) { + this.warnings = Warnings.createWarnings( + driverContext.warningsMode(), + source.source().getLineNumber(), + source.source().getColumnNumber(), + source.text() + ); + } + return warnings; + } + + static class Factory implements EvalOperator.ExpressionEvaluator.Factory { + private final Source source; + + private final EvalOperator.ExpressionEvaluator.Factory str; + + private final EvalOperator.ExpressionEvaluator.Factory substr; + + public Factory(Source source, EvalOperator.ExpressionEvaluator.Factory str, + EvalOperator.ExpressionEvaluator.Factory substr) { + this.source = source; + this.str = str; + this.substr = substr; + } + + @Override + public ContainsEvaluator get(DriverContext context) { + return new ContainsEvaluator(source, str.get(context), substr.get(context), context); + } + + @Override + public String toString() { + return "ContainsEvaluator[" + "str=" + str + ", substr=" + substr + "]"; + } + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java index 0eca67f625121..7c0d8df82880a 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java @@ -161,6 +161,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.string.BitLength; import org.elasticsearch.xpack.esql.expression.function.scalar.string.ByteLength; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Concat; +import org.elasticsearch.xpack.esql.expression.function.scalar.string.Contains; import org.elasticsearch.xpack.esql.expression.function.scalar.string.EndsWith; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Hash; import org.elasticsearch.xpack.esql.expression.function.scalar.string.LTrim; @@ -366,6 +367,7 @@ private static FunctionDefinition[][] functions() { def(BitLength.class, BitLength::new, "bit_length"), def(ByteLength.class, ByteLength::new, "byte_length"), def(Concat.class, Concat::new, "concat"), + def(Contains.class, Contains::new, "contains"), def(EndsWith.class, EndsWith::new, "ends_with"), def(Hash.class, Hash::new, "hash"), def(LTrim.class, LTrim::new, "ltrim"), diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/ScalarFunctionWritables.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/ScalarFunctionWritables.java index 684e685e2eafb..8d142d84e3c49 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/ScalarFunctionWritables.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/ScalarFunctionWritables.java @@ -35,6 +35,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.nulls.Coalesce; import org.elasticsearch.xpack.esql.expression.function.scalar.string.BitLength; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Concat; +import org.elasticsearch.xpack.esql.expression.function.scalar.string.Contains; import org.elasticsearch.xpack.esql.expression.function.scalar.string.EndsWith; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Hash; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Left; @@ -69,6 +70,7 @@ public static List getNamedWriteables() { entries.add(CIDRMatch.ENTRY); entries.add(Coalesce.ENTRY); entries.add(Concat.ENTRY); + entries.add(Contains.ENTRY); entries.add(E.ENTRY); entries.add(EndsWith.ENTRY); entries.add(FromAggregateMetricDouble.ENTRY); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Contains.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Contains.java new file mode 100644 index 0000000000000..5e48e1eec23a5 --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Contains.java @@ -0,0 +1,144 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.scalar.string; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.compute.ann.Evaluator; +import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.tree.NodeInfo; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.function.Example; +import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; +import org.elasticsearch.xpack.esql.expression.function.OptionalArgument; +import org.elasticsearch.xpack.esql.expression.function.Param; +import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction; +import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; + +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString; + +/** + * Contains function, given a string 'a' and a substring 'b', returns true if the substring 'b' is in 'a'. + */ +public class Contains extends EsqlScalarFunction implements OptionalArgument { + public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Contains", Contains::new); + + private final Expression str; + private final Expression substr; + + @FunctionInfo(returnType = "boolean", description = """ + Returns true if a keyword substring is within another string. + Returns false if the substring cannot be found. + """, examples = @Example(file = "string", tag = "contains")) + public Contains( + Source source, + @Param(name = "string", type = { "keyword", "text" }, description = "An input string") Expression str, + @Param( + name = "substring", + type = { "keyword", "text" }, + description = "A substring to find in the input string" + ) Expression substr + ) { + super(source, Arrays.asList(str, substr)); + this.str = str; + this.substr = substr; + } + + private Contains(StreamInput in) throws IOException { + this( + Source.readFrom((PlanStreamInput) in), + in.readNamedWriteable(Expression.class), + in.readNamedWriteable(Expression.class) + ); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + source().writeTo(out); + out.writeNamedWriteable(str); + out.writeNamedWriteable(substr); + } + + @Override + public String getWriteableName() { + return ENTRY.name; + } + + @Override + public DataType dataType() { + return DataType.INTEGER; + } + + @Override + protected TypeResolution resolveType() { + if (childrenResolved() == false) { + return new TypeResolution("Unresolved children"); + } + + TypeResolution resolution = isString(str, sourceText(), FIRST); + if (resolution.unresolved()) { + return resolution; + } + resolution = isString(substr, sourceText(), SECOND); + if (resolution.unresolved()) { + return resolution; + } + + return TypeResolution.TYPE_RESOLVED; + } + + @Override + public boolean foldable() { + return str.foldable() && substr.foldable(); + } + + @Evaluator + static boolean process(BytesRef str, BytesRef substr) { + if (str == null || substr == null || str.length < substr.length) { + return false; + } + String utf8ToString = str.utf8ToString(); + return utf8ToString.contains(substr.utf8ToString()); + } + + @Override + public Expression replaceChildren(List newChildren) { + return new Contains(source(), newChildren.get(0), newChildren.get(1)); + } + + @Override + protected NodeInfo info() { + return NodeInfo.create(this, Contains::new, str, substr); + } + + @Override + public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { + ExpressionEvaluator.Factory strExpr = toEvaluator.apply(str); + ExpressionEvaluator.Factory substrExpr = toEvaluator.apply(substr); + + return new ContainsEvaluator.Factory(source(), strExpr, substrExpr); + } + + Expression str() { + return str; + } + + Expression substr() { + return substr; + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ContainsTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ContainsTests.java new file mode 100644 index 0000000000000..31641bc9a55db --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ContainsTests.java @@ -0,0 +1,171 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.scalar.string; + +import com.carrotsearch.randomizedtesting.annotations.Name; +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.function.AbstractScalarFunctionTestCase; +import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier; + +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; +import java.util.Locale; +import java.util.function.Function; +import java.util.function.Supplier; + +import static org.hamcrest.Matchers.equalTo; + +/** + * Tests for {@link Locate} function. + */ +public class ContainsTests extends AbstractScalarFunctionTestCase { + public ContainsTests(@Name("TestCase") Supplier testCaseSupplier) { + this.testCase = testCaseSupplier.get(); + } + + @ParametersFactory + public static Iterable parameters() { + List suppliers = new ArrayList<>(); + for (DataType strType : DataType.stringTypes()) { + for (DataType substrType : DataType.stringTypes()) { + suppliers.add( + supplier( + "", + strType, + substrType, + () -> randomRealisticUnicodeOfCodepointLength(10), + str -> randomRealisticUnicodeOfCodepointLength(2), + String::contains + ) + ); + suppliers.add( + supplier( + "exact match ", + strType, + substrType, + () -> randomRealisticUnicodeOfCodepointLength(10), + str -> str, + (str, substr) -> true + ) + ); + } + } + + // Here follows some non-randomized examples that we want to cover on every run + suppliers.add(supplier("a tiger", "a t", true)); + suppliers.add(supplier("a tiger", "a", true)); + suppliers.add(supplier("界世", "界", true)); + suppliers.add(supplier("a tiger", "er",true)); + suppliers.add(supplier("a tiger", "r", true)); + suppliers.add(supplier("界世", "世", true)); + suppliers.add(supplier("a tiger", "ti", true)); + suppliers.add(supplier("a tiger", "ige", true)); + suppliers.add(supplier("世界世", "界", true)); + suppliers.add(supplier("a tiger", "tigers", true)); + suppliers.add(supplier("a tiger", "ipa", true)); + suppliers.add(supplier("世界世", "\uD83C\uDF0D", true)); + + suppliers.add(supplier("a ti𠜎er", "𠜎er", true)); + suppliers.add(supplier("a ti𠜎er", "i𠜎e", true)); + suppliers.add(supplier("a ti𠜎er", "ti𠜎", true)); + suppliers.add(supplier("a ti𠜎er", "er", true)); + suppliers.add(supplier("a ti𠜎er", "r", true)); + suppliers.add(supplier("a ti𠜎er", "a ti𠜎er", true)); + // prefix + suppliers.add(supplier("𠜎abc", "𠜎", true)); + suppliers.add(supplier("𠜎 abc", "𠜎 ", true)); + suppliers.add(supplier("𠜎𠜎𠜎abc", "𠜎𠜎𠜎", true)); + suppliers.add(supplier("𠜎𠜎𠜎 abc", "𠜎𠜎𠜎 ", true)); + suppliers.add(supplier(" 𠜎𠜎𠜎 abc", " 𠜎𠜎𠜎 ", true)); + suppliers.add(supplier("𠜎 𠜎 𠜎 abc", "𠜎 𠜎 𠜎 ", true)); + // suffix + suppliers.add(supplier("abc𠜎", "𠜎", true)); + suppliers.add(supplier("abc 𠜎", " 𠜎", true)); + suppliers.add(supplier("abc𠜎𠜎𠜎", "𠜎𠜎𠜎", true)); + suppliers.add(supplier("abc 𠜎𠜎𠜎", " 𠜎𠜎𠜎", true)); + suppliers.add(supplier("abc𠜎𠜎𠜎 ", "𠜎𠜎𠜎 ", true)); + // out of range + suppliers.add(supplier("𠜎a ti𠜎er", "𠜎a ti𠜎ers", false)); + suppliers.add(supplier("a ti𠜎er", "aa ti𠜎er", false)); + suppliers.add(supplier("abc𠜎𠜎", "𠜎𠜎𠜎", false)); + + assert "🐱".length() == 2 && "🐶".length() == 2; + assert "🐱".codePointCount(0, 2) == 1 && "🐶".codePointCount(0, 2) == 1; + assert "🐱".getBytes(StandardCharsets.UTF_8).length == 4 && "🐶".getBytes(StandardCharsets.UTF_8).length == 4; + suppliers.add(supplier("🐱Meow!🐶Woof!", "🐱Meow!🐶Woof!", true)); + suppliers.add(supplier("🐱Meow!🐶Woof!", "Meow!🐶Woof!", true)); + suppliers.add(supplier("🐱Meow!🐶Woof!", "eow!🐶Woof!", true)); + + return parameterSuppliersFromTypedDataWithDefaultChecksNoErrors(true, suppliers); + } + + @Override + protected Expression build(Source source, List args) { + return new Contains(source, args.get(0), args.get(1)); + } + + private static TestCaseSupplier supplier(String str, String substr, @Nullable Boolean expectedValue) { + String name = String.format(Locale.ROOT, "\"%s\" in \"%s\"", substr, str); + return new TestCaseSupplier( + name, + types(DataType.KEYWORD, DataType.KEYWORD), + () -> testCase(DataType.KEYWORD, DataType.KEYWORD, str, substr, expectedValue) + ); + } + + interface ExpectedValue { + boolean expectedValue(String str, String substr); + } + + private static TestCaseSupplier supplier( + String name, + DataType strType, + DataType substrType, + Supplier strValueSupplier, + Function substrValueSupplier, + ExpectedValue expectedValue + ) { + List types = types(strType, substrType); + return new TestCaseSupplier(name + TestCaseSupplier.nameFromTypes(types), types, () -> { + String str = strValueSupplier.get(); + String substr = substrValueSupplier.apply(str); + return testCase(strType, substrType, str, substr, expectedValue.expectedValue(str, substr)); + }); + } + + private static String expectedToString() { + return "ContainsEvaluator[str=Attribute[channel=0], substr=Attribute[channel=1]]"; + } + + private static List types(DataType firstType, DataType secondType) { + List types = new ArrayList<>(); + types.add(firstType); + types.add(secondType); + return types; + } + + private static TestCaseSupplier.TestCase testCase( + DataType strType, + DataType substrType, + String str, + String substr, + Boolean expectedValue + ) { + List values = new ArrayList<>(); + values.add(new TestCaseSupplier.TypedData(str == null ? null : new BytesRef(str), strType, "str")); + values.add(new TestCaseSupplier.TypedData(substr == null ? null : new BytesRef(substr), substrType, "substr")); + return new TestCaseSupplier.TestCase(values, expectedToString(), DataType.INTEGER, equalTo(expectedValue)); + } +} From 372b4a5e5e9b99bcec9d853606f48526dcbe395f Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sat, 16 Aug 2025 00:29:14 +0000 Subject: [PATCH 02/16] [CI] Auto commit changes from spotless --- .../expression/function/scalar/string/Contains.java | 12 ++---------- .../function/scalar/string/ContainsTests.java | 4 ++-- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Contains.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Contains.java index 5e48e1eec23a5..bf5bebf11d350 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Contains.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Contains.java @@ -48,11 +48,7 @@ public class Contains extends EsqlScalarFunction implements OptionalArgument { public Contains( Source source, @Param(name = "string", type = { "keyword", "text" }, description = "An input string") Expression str, - @Param( - name = "substring", - type = { "keyword", "text" }, - description = "A substring to find in the input string" - ) Expression substr + @Param(name = "substring", type = { "keyword", "text" }, description = "A substring to find in the input string") Expression substr ) { super(source, Arrays.asList(str, substr)); this.str = str; @@ -60,11 +56,7 @@ public Contains( } private Contains(StreamInput in) throws IOException { - this( - Source.readFrom((PlanStreamInput) in), - in.readNamedWriteable(Expression.class), - in.readNamedWriteable(Expression.class) - ); + this(Source.readFrom((PlanStreamInput) in), in.readNamedWriteable(Expression.class), in.readNamedWriteable(Expression.class)); } @Override diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ContainsTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ContainsTests.java index 31641bc9a55db..7bab5ca946ae7 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ContainsTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ContainsTests.java @@ -67,7 +67,7 @@ public static Iterable parameters() { suppliers.add(supplier("a tiger", "a t", true)); suppliers.add(supplier("a tiger", "a", true)); suppliers.add(supplier("界世", "界", true)); - suppliers.add(supplier("a tiger", "er",true)); + suppliers.add(supplier("a tiger", "er", true)); suppliers.add(supplier("a tiger", "r", true)); suppliers.add(supplier("界世", "世", true)); suppliers.add(supplier("a tiger", "ti", true)); @@ -121,7 +121,7 @@ private static TestCaseSupplier supplier(String str, String substr, @Nullable Bo return new TestCaseSupplier( name, types(DataType.KEYWORD, DataType.KEYWORD), - () -> testCase(DataType.KEYWORD, DataType.KEYWORD, str, substr, expectedValue) + () -> testCase(DataType.KEYWORD, DataType.KEYWORD, str, substr, expectedValue) ); } From 5db278ad0cf5c6297c5899f88246c151dedc4ea2 Mon Sep 17 00:00:00 2001 From: mjmbischoff Date: Sat, 16 Aug 2025 03:47:04 +0200 Subject: [PATCH 03/16] Fix Contains function return type to BOOLEAN --- .../esql/expression/function/scalar/string/Contains.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Contains.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Contains.java index bf5bebf11d350..c6ff994273567 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Contains.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Contains.java @@ -43,8 +43,7 @@ public class Contains extends EsqlScalarFunction implements OptionalArgument { @FunctionInfo(returnType = "boolean", description = """ Returns true if a keyword substring is within another string. - Returns false if the substring cannot be found. - """, examples = @Example(file = "string", tag = "contains")) + Returns false if the substring cannot be found.""", examples = @Example(file = "string", tag = "contains")) public Contains( Source source, @Param(name = "string", type = { "keyword", "text" }, description = "An input string") Expression str, @@ -73,7 +72,7 @@ public String getWriteableName() { @Override public DataType dataType() { - return DataType.INTEGER; + return DataType.BOOLEAN; } @Override From 690d18f924306c1ccd1ac01f1c0e2fd59c0c4dba Mon Sep 17 00:00:00 2001 From: mjmbischoff Date: Sat, 16 Aug 2025 03:58:01 +0200 Subject: [PATCH 04/16] Fix incorrect expected results in Contains function tests --- .../expression/function/scalar/string/ContainsTests.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ContainsTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ContainsTests.java index 7bab5ca946ae7..69953aee8afed 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ContainsTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ContainsTests.java @@ -73,9 +73,9 @@ public static Iterable parameters() { suppliers.add(supplier("a tiger", "ti", true)); suppliers.add(supplier("a tiger", "ige", true)); suppliers.add(supplier("世界世", "界", true)); - suppliers.add(supplier("a tiger", "tigers", true)); - suppliers.add(supplier("a tiger", "ipa", true)); - suppliers.add(supplier("世界世", "\uD83C\uDF0D", true)); + suppliers.add(supplier("a tiger", "tigers", false)); + suppliers.add(supplier("a tiger", "ipa", false)); + suppliers.add(supplier("世界世", "\uD83C\uDF0D", false)); suppliers.add(supplier("a ti𠜎er", "𠜎er", true)); suppliers.add(supplier("a ti𠜎er", "i𠜎e", true)); From dc3e82ef790768cb3f0d1eba56e4db6973991acd Mon Sep 17 00:00:00 2001 From: mjmbischoff Date: Sat, 16 Aug 2025 04:02:27 +0200 Subject: [PATCH 05/16] Remove redundant assertions in Contains function tests --- .../esql/expression/function/scalar/string/ContainsTests.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ContainsTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ContainsTests.java index 69953aee8afed..e1ac0fa72b624 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ContainsTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ContainsTests.java @@ -101,9 +101,6 @@ public static Iterable parameters() { suppliers.add(supplier("a ti𠜎er", "aa ti𠜎er", false)); suppliers.add(supplier("abc𠜎𠜎", "𠜎𠜎𠜎", false)); - assert "🐱".length() == 2 && "🐶".length() == 2; - assert "🐱".codePointCount(0, 2) == 1 && "🐶".codePointCount(0, 2) == 1; - assert "🐱".getBytes(StandardCharsets.UTF_8).length == 4 && "🐶".getBytes(StandardCharsets.UTF_8).length == 4; suppliers.add(supplier("🐱Meow!🐶Woof!", "🐱Meow!🐶Woof!", true)); suppliers.add(supplier("🐱Meow!🐶Woof!", "Meow!🐶Woof!", true)); suppliers.add(supplier("🐱Meow!🐶Woof!", "eow!🐶Woof!", true)); From d0f39577cb84ccec08c99f9c62a923007fcb1e4c Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sat, 16 Aug 2025 02:08:28 +0000 Subject: [PATCH 06/16] [CI] Auto commit changes from spotless --- .../esql/expression/function/scalar/string/ContainsTests.java | 1 - 1 file changed, 1 deletion(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ContainsTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ContainsTests.java index e1ac0fa72b624..b3136b2ade1f8 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ContainsTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ContainsTests.java @@ -18,7 +18,6 @@ import org.elasticsearch.xpack.esql.expression.function.AbstractScalarFunctionTestCase; import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; import java.util.Locale; From bafbc034d63ce20e5091d5afd7dadd2d0011370f Mon Sep 17 00:00:00 2001 From: mjmbischoff Date: Sat, 16 Aug 2025 11:26:56 +0200 Subject: [PATCH 07/16] Fix ContainsTests return type to match BOOLEAN --- .../esql/expression/function/scalar/string/ContainsTests.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ContainsTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ContainsTests.java index b3136b2ade1f8..48856a572ad9b 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ContainsTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ContainsTests.java @@ -162,6 +162,6 @@ private static TestCaseSupplier.TestCase testCase( List values = new ArrayList<>(); values.add(new TestCaseSupplier.TypedData(str == null ? null : new BytesRef(str), strType, "str")); values.add(new TestCaseSupplier.TypedData(substr == null ? null : new BytesRef(substr), substrType, "substr")); - return new TestCaseSupplier.TestCase(values, expectedToString(), DataType.INTEGER, equalTo(expectedValue)); + return new TestCaseSupplier.TestCase(values, expectedToString(), DataType.BOOLEAN, equalTo(expectedValue)); } } From c45ca9de312cc1a63feb909db2851173f935862d Mon Sep 17 00:00:00 2001 From: mjmbischoff Date: Sat, 16 Aug 2025 13:16:19 +0200 Subject: [PATCH 08/16] Add generated documentation for the CONTAINS function in ESQL --- .../functions/description/contains.md | 6 ++ .../_snippets/functions/examples/contains.md | 14 +++ .../_snippets/functions/layout/contains.md | 23 +++++ .../functions/parameters/contains.md | 10 +++ .../_snippets/functions/types/contains.md | 11 +++ .../esql/images/functions/contains.svg | 1 + .../kibana/definition/functions/contains.json | 85 +++++++++++++++++++ .../esql/kibana/docs/functions/contains.md | 10 +++ 8 files changed, 160 insertions(+) create mode 100644 docs/reference/query-languages/esql/_snippets/functions/description/contains.md create mode 100644 docs/reference/query-languages/esql/_snippets/functions/examples/contains.md create mode 100644 docs/reference/query-languages/esql/_snippets/functions/layout/contains.md create mode 100644 docs/reference/query-languages/esql/_snippets/functions/parameters/contains.md create mode 100644 docs/reference/query-languages/esql/_snippets/functions/types/contains.md create mode 100644 docs/reference/query-languages/esql/images/functions/contains.svg create mode 100644 docs/reference/query-languages/esql/kibana/definition/functions/contains.json create mode 100644 docs/reference/query-languages/esql/kibana/docs/functions/contains.md diff --git a/docs/reference/query-languages/esql/_snippets/functions/description/contains.md b/docs/reference/query-languages/esql/_snippets/functions/description/contains.md new file mode 100644 index 0000000000000..ac1a44db4795c --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/description/contains.md @@ -0,0 +1,6 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Description** + +Returns true if a keyword substring is within another string. Returns false if the substring cannot be found. + diff --git a/docs/reference/query-languages/esql/_snippets/functions/examples/contains.md b/docs/reference/query-languages/esql/_snippets/functions/examples/contains.md new file mode 100644 index 0000000000000..4ec94fbb7e216 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/examples/contains.md @@ -0,0 +1,14 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Example** + +```esql +ROW a = "hello" +| EVAL a_ll = CONTAINS(a, "ll") +``` + +| a:keyword | a_ll:boolean | +| --- | --- | +| hello | true | + + diff --git a/docs/reference/query-languages/esql/_snippets/functions/layout/contains.md b/docs/reference/query-languages/esql/_snippets/functions/layout/contains.md new file mode 100644 index 0000000000000..8fcedc8306f13 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/layout/contains.md @@ -0,0 +1,23 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +## `CONTAINS` [esql-contains] + +**Syntax** + +:::{image} ../../../images/functions/contains.svg +:alt: Embedded +:class: text-center +::: + + +:::{include} ../parameters/contains.md +::: + +:::{include} ../description/contains.md +::: + +:::{include} ../types/contains.md +::: + +:::{include} ../examples/contains.md +::: diff --git a/docs/reference/query-languages/esql/_snippets/functions/parameters/contains.md b/docs/reference/query-languages/esql/_snippets/functions/parameters/contains.md new file mode 100644 index 0000000000000..6f67af31f149a --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/parameters/contains.md @@ -0,0 +1,10 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Parameters** + +`string` +: An input string + +`substring` +: A substring to find in the input string + diff --git a/docs/reference/query-languages/esql/_snippets/functions/types/contains.md b/docs/reference/query-languages/esql/_snippets/functions/types/contains.md new file mode 100644 index 0000000000000..06a2658eca86d --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/types/contains.md @@ -0,0 +1,11 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Supported types** + +| string | substring | result | +| --- | --- | --- | +| keyword | keyword | boolean | +| keyword | text | boolean | +| text | keyword | boolean | +| text | text | boolean | + diff --git a/docs/reference/query-languages/esql/images/functions/contains.svg b/docs/reference/query-languages/esql/images/functions/contains.svg new file mode 100644 index 0000000000000..4264b83f783e3 --- /dev/null +++ b/docs/reference/query-languages/esql/images/functions/contains.svg @@ -0,0 +1 @@ +CONTAINS(string,substring) \ No newline at end of file diff --git a/docs/reference/query-languages/esql/kibana/definition/functions/contains.json b/docs/reference/query-languages/esql/kibana/definition/functions/contains.json new file mode 100644 index 0000000000000..f3b67bf57b627 --- /dev/null +++ b/docs/reference/query-languages/esql/kibana/definition/functions/contains.json @@ -0,0 +1,85 @@ +{ + "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it.", + "type" : "scalar", + "name" : "contains", + "description" : "Returns true if a keyword substring is within another string.\nReturns false if the substring cannot be found.", + "signatures" : [ + { + "params" : [ + { + "name" : "string", + "type" : "keyword", + "optional" : false, + "description" : "An input string" + }, + { + "name" : "substring", + "type" : "keyword", + "optional" : false, + "description" : "A substring to find in the input string" + } + ], + "variadic" : false, + "returnType" : "boolean" + }, + { + "params" : [ + { + "name" : "string", + "type" : "keyword", + "optional" : false, + "description" : "An input string" + }, + { + "name" : "substring", + "type" : "text", + "optional" : false, + "description" : "A substring to find in the input string" + } + ], + "variadic" : false, + "returnType" : "boolean" + }, + { + "params" : [ + { + "name" : "string", + "type" : "text", + "optional" : false, + "description" : "An input string" + }, + { + "name" : "substring", + "type" : "keyword", + "optional" : false, + "description" : "A substring to find in the input string" + } + ], + "variadic" : false, + "returnType" : "boolean" + }, + { + "params" : [ + { + "name" : "string", + "type" : "text", + "optional" : false, + "description" : "An input string" + }, + { + "name" : "substring", + "type" : "text", + "optional" : false, + "description" : "A substring to find in the input string" + } + ], + "variadic" : false, + "returnType" : "boolean" + } + ], + "examples" : [ + "ROW a = \"hello\"\n| EVAL a_ll = CONTAINS(a, \"ll\")" + ], + "preview" : false, + "snapshot_only" : false +} diff --git a/docs/reference/query-languages/esql/kibana/docs/functions/contains.md b/docs/reference/query-languages/esql/kibana/docs/functions/contains.md new file mode 100644 index 0000000000000..c67c3f697358c --- /dev/null +++ b/docs/reference/query-languages/esql/kibana/docs/functions/contains.md @@ -0,0 +1,10 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +### CONTAINS +Returns true if a keyword substring is within another string. +Returns false if the substring cannot be found. + +```esql +ROW a = "hello" +| EVAL a_ll = CONTAINS(a, "ll") +``` From 11fef88e9ab4fc8e8e562ae4d574d041c162e7c7 Mon Sep 17 00:00:00 2001 From: mjmbischoff Date: Sat, 16 Aug 2025 14:21:44 +0200 Subject: [PATCH 09/16] Update variable name in CONTAINS function examples and docs with nicer value --- .../esql/_snippets/functions/examples/contains.md | 4 ++-- .../esql/kibana/definition/functions/contains.json | 2 +- .../query-languages/esql/kibana/docs/functions/contains.md | 2 +- .../esql/qa/testFixtures/src/main/resources/string.csv-spec | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/reference/query-languages/esql/_snippets/functions/examples/contains.md b/docs/reference/query-languages/esql/_snippets/functions/examples/contains.md index 4ec94fbb7e216..db28c1413cb83 100644 --- a/docs/reference/query-languages/esql/_snippets/functions/examples/contains.md +++ b/docs/reference/query-languages/esql/_snippets/functions/examples/contains.md @@ -4,10 +4,10 @@ ```esql ROW a = "hello" -| EVAL a_ll = CONTAINS(a, "ll") +| EVAL has_ll = CONTAINS(a, "ll") ``` -| a:keyword | a_ll:boolean | +| a:keyword | has_ll:boolean | | --- | --- | | hello | true | diff --git a/docs/reference/query-languages/esql/kibana/definition/functions/contains.json b/docs/reference/query-languages/esql/kibana/definition/functions/contains.json index f3b67bf57b627..72e01fc5483ff 100644 --- a/docs/reference/query-languages/esql/kibana/definition/functions/contains.json +++ b/docs/reference/query-languages/esql/kibana/definition/functions/contains.json @@ -78,7 +78,7 @@ } ], "examples" : [ - "ROW a = \"hello\"\n| EVAL a_ll = CONTAINS(a, \"ll\")" + "ROW a = \"hello\"\n| EVAL has_ll = CONTAINS(a, \"ll\")" ], "preview" : false, "snapshot_only" : false diff --git a/docs/reference/query-languages/esql/kibana/docs/functions/contains.md b/docs/reference/query-languages/esql/kibana/docs/functions/contains.md index c67c3f697358c..f380fd07ba546 100644 --- a/docs/reference/query-languages/esql/kibana/docs/functions/contains.md +++ b/docs/reference/query-languages/esql/kibana/docs/functions/contains.md @@ -6,5 +6,5 @@ Returns false if the substring cannot be found. ```esql ROW a = "hello" -| EVAL a_ll = CONTAINS(a, "ll") +| EVAL has_ll = CONTAINS(a, "ll") ``` diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec index f3654010d4471..083da3c1a6ef1 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec @@ -392,12 +392,12 @@ emp_no:integer | name:keyword contains#[skip:-9.2.99,reason:new string function added in 9.3] // tag::contains[] ROW a = "hello" -| EVAL a_ll = CONTAINS(a, "ll") +| EVAL has_ll = CONTAINS(a, "ll") // end::contains[] ; // tag::contains-result[] -a:keyword | a_ll:boolean +a:keyword | has_ll:boolean hello | true // end::contains-result[] ; From b3262cc86202b02ac96223d7e963bcbdf2e902cf Mon Sep 17 00:00:00 2001 From: Michael Bischoff Date: Sat, 16 Aug 2025 15:34:14 +0200 Subject: [PATCH 10/16] Update docs/changelog/133016.yaml --- docs/changelog/133016.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/133016.yaml diff --git a/docs/changelog/133016.yaml b/docs/changelog/133016.yaml new file mode 100644 index 0000000000000..8b407ef405d35 --- /dev/null +++ b/docs/changelog/133016.yaml @@ -0,0 +1,5 @@ +pr: 133016 +summary: Adding Contains ESQL String function +area: ES|QL +type: feature +issues: [] From bb28101cc4ec4867f6760225eebbac4ce6bb2a9d Mon Sep 17 00:00:00 2001 From: mjmbischoff Date: Tue, 19 Aug 2025 08:33:05 +0200 Subject: [PATCH 11/16] Add reference under string functions for the CONTAINS function documentation in ESQL --- .../esql/functions-operators/string-functions.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/reference/query-languages/esql/functions-operators/string-functions.md b/docs/reference/query-languages/esql/functions-operators/string-functions.md index 4d053709f0ed3..09ca490293aba 100644 --- a/docs/reference/query-languages/esql/functions-operators/string-functions.md +++ b/docs/reference/query-languages/esql/functions-operators/string-functions.md @@ -21,6 +21,9 @@ mapped_pages: :::{include} ../_snippets/functions/layout/concat.md ::: +:::{include} ../_snippets/functions/layout/contains.md +::: + :::{include} ../_snippets/functions/layout/ends_with.md ::: From 3afaf81aef7ff3b5ac7d127023a4778fa96147cb Mon Sep 17 00:00:00 2001 From: mjmbischoff Date: Tue, 19 Aug 2025 08:33:05 +0200 Subject: [PATCH 12/16] Add reference under string functions for the CONTAINS function documentation in ESQL --- .../query-languages/esql/_snippets/lists/string-functions.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/reference/query-languages/esql/_snippets/lists/string-functions.md b/docs/reference/query-languages/esql/_snippets/lists/string-functions.md index 612f59d7056a6..98552c19685d6 100644 --- a/docs/reference/query-languages/esql/_snippets/lists/string-functions.md +++ b/docs/reference/query-languages/esql/_snippets/lists/string-functions.md @@ -1,6 +1,7 @@ * [`BIT_LENGTH`](../../functions-operators/string-functions.md#esql-bit_length) * [`BYTE_LENGTH`](../../functions-operators/string-functions.md#esql-byte_length) * [`CONCAT`](../../functions-operators/string-functions.md#esql-concat) +* [`CONTAINS`](../../functions-operators/string-functions.md#esql-contains) * [`ENDS_WITH`](../../functions-operators/string-functions.md#esql-ends_with) * [`FROM_BASE64`](../../functions-operators/string-functions.md#esql-from_base64) * [`HASH`](../../functions-operators/string-functions.md#esql-hash) From 22f89a239fe528c5ee4182ade8b7e90a380daeb3 Mon Sep 17 00:00:00 2001 From: mjmbischoff Date: Tue, 19 Aug 2025 17:10:42 +0200 Subject: [PATCH 13/16] Removed skip markers from CONTAINS function test cases - wrongfully assumed it was a marker for documentation. But it's used to ignore the test cases. Re-enabling the tests and fixing them. --- .../src/main/resources/string.csv-spec | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec index 083da3c1a6ef1..058e4e4e9ad6e 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec @@ -389,7 +389,7 @@ emp_no:integer | name:keyword ; -contains#[skip:-9.2.99,reason:new string function added in 9.3] +contains // tag::contains[] ROW a = "hello" | EVAL has_ll = CONTAINS(a, "ll") @@ -402,28 +402,28 @@ hello | true // end::contains-result[] ; -containsFail#[skip:-9.2.99,reason:new string function added in 9.3] +containsFail row a = "hello" | eval a_ll = contains(a, "int"); a:keyword | a_ll:boolean hello | false ; -containsLongerSubstr#[skip:-9.2.99,reason:new string function added in 9.3] +containsLongerSubstr row a = "hello" | eval a_ll = contains(a, "farewell"); a:keyword | a_ll:boolean hello | false ; -containsSame#[skip:-9.2.99,reason:new string function added in 9.3] +containsSame row a = "hello" | eval a_ll = contains(a, "hello"); a:keyword | a_ll:boolean hello | true ; -containsWithSubstring#[skip:-9.2.99,reason:new string function added in 9.3] +containsWithSubstring from employees | where emp_no <= 10010 | eval f_s = substring(last_name, 2) | eval f_l = contains(last_name, f_s) | keep emp_no, last_name, f_s, f_l; ignoreOrder:true @@ -440,33 +440,33 @@ emp_no:integer | last_name:keyword | f_s:keyword | f_l:boolean 10010 | Piveteau | iveteau | true ; -containsUtf16Emoji#[skip:-9.2.99,reason:new string function added in 9.3] +containsUtf16Emoji row a = "🐱Meow!🐶Woof!" | eval f_s = substring(a, 2) | eval f_l = contains(a, f_s); a:keyword | f_s:keyword | f_l:boolean 🐱Meow!🐶Woof! | Meow!🐶Woof! | true ; -containsNestedCase#[skip:-9.2.99,reason:new string function added in 9.3] +containsNestedCase row a = "hello" | eval a_ll = CASE(contains(a, "ll"), "success","fail"); a:keyword | a_ll:keyword hello | success ; -containsNestSubstring#[skip:-9.2.99,reason:new string function added in 9.3] +containsNestSubstring row a = "hello" | eval a_ll = contains(substring(a, 2), "ll"); a:keyword | a_ll:boolean hello | true ; -containsWarnings#[skip:-9.2.99,reason:new string function added in 9.3] +containsWarnings from hosts | where host=="epsilon" | eval l1 = contains(host_group, "ate"), l2 = contains(description, "ate") | keep l1, l2; ignoreOrder:true -warning:Line 1:80: evaluation of [contains(description, \"ate\")] failed, treating result as null. Only first 20 failures recorded. -warning:Line 1:80: java.lang.IllegalArgumentException: single-value function encountered multi-value +warning:Line 1:82: evaluation of [contains(description, \"ate\")] failed, treating result as null. Only first 20 failures recorded. +warning:Line 1:82: java.lang.IllegalArgumentException: single-value function encountered multi-value l1:boolean | l2:boolean true | null From 7385f0206906208cd04e1500a9a4bace4c862d68 Mon Sep 17 00:00:00 2001 From: mjmbischoff Date: Tue, 19 Aug 2025 18:54:51 +0200 Subject: [PATCH 14/16] Readding skipped markers as else the test gets run in the backwards compatibility test environment. - not sure how to test it as, I feel like the version should be main on main / dev. Doing the dance for now. --- .../src/main/resources/string.csv-spec | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec index 058e4e4e9ad6e..d0cddc9d1fbce 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec @@ -389,7 +389,7 @@ emp_no:integer | name:keyword ; -contains +contains#[skip:-9.2.99,reason:new string function added in 9.3] // tag::contains[] ROW a = "hello" | EVAL has_ll = CONTAINS(a, "ll") @@ -402,28 +402,28 @@ hello | true // end::contains-result[] ; -containsFail +containsFail#[skip:-9.2.99,reason:new string function added in 9.3] row a = "hello" | eval a_ll = contains(a, "int"); a:keyword | a_ll:boolean hello | false ; -containsLongerSubstr +containsLongerSubstr#[skip:-9.2.99,reason:new string function added in 9.3] row a = "hello" | eval a_ll = contains(a, "farewell"); a:keyword | a_ll:boolean hello | false ; -containsSame +containsSame#[skip:-9.2.99,reason:new string function added in 9.3] row a = "hello" | eval a_ll = contains(a, "hello"); a:keyword | a_ll:boolean hello | true ; -containsWithSubstring +containsWithSubstring#[skip:-9.2.99,reason:new string function added in 9.3] from employees | where emp_no <= 10010 | eval f_s = substring(last_name, 2) | eval f_l = contains(last_name, f_s) | keep emp_no, last_name, f_s, f_l; ignoreOrder:true @@ -440,28 +440,28 @@ emp_no:integer | last_name:keyword | f_s:keyword | f_l:boolean 10010 | Piveteau | iveteau | true ; -containsUtf16Emoji +containsUtf16Emoji#[skip:-9.2.99,reason:new string function added in 9.3] row a = "🐱Meow!🐶Woof!" | eval f_s = substring(a, 2) | eval f_l = contains(a, f_s); a:keyword | f_s:keyword | f_l:boolean 🐱Meow!🐶Woof! | Meow!🐶Woof! | true ; -containsNestedCase +containsNestedCase#[skip:-9.2.99,reason:new string function added in 9.3] row a = "hello" | eval a_ll = CASE(contains(a, "ll"), "success","fail"); a:keyword | a_ll:keyword hello | success ; -containsNestSubstring +containsNestSubstring#[skip:-9.2.99,reason:new string function added in 9.3] row a = "hello" | eval a_ll = contains(substring(a, 2), "ll"); a:keyword | a_ll:boolean hello | true ; -containsWarnings +containsWarnings#[skip:-9.2.99,reason:new string function added in 9.3] from hosts | where host=="epsilon" | eval l1 = contains(host_group, "ate"), l2 = contains(description, "ate") | keep l1, l2; ignoreOrder:true From 85a442f527aa859d953f08cfacb0d3d2b8f1dd9a Mon Sep 17 00:00:00 2001 From: mjmbischoff Date: Sun, 24 Aug 2025 15:02:36 +0200 Subject: [PATCH 15/16] Add `required_capability: fn_contains` to trigger CONTAINS test cases replacing skip logic. --- .../src/main/resources/string.csv-spec | 27 ++++++++++++------- .../xpack/esql/action/EsqlCapabilities.java | 5 ++++ .../function/scalar/string/Contains.java | 3 +-- 3 files changed, 24 insertions(+), 11 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec index d0cddc9d1fbce..7110b299e13a2 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec @@ -389,7 +389,8 @@ emp_no:integer | name:keyword ; -contains#[skip:-9.2.99,reason:new string function added in 9.3] +contains +required_capability: fn_contains // tag::contains[] ROW a = "hello" | EVAL has_ll = CONTAINS(a, "ll") @@ -402,28 +403,32 @@ hello | true // end::contains-result[] ; -containsFail#[skip:-9.2.99,reason:new string function added in 9.3] +containsFail +required_capability: fn_contains row a = "hello" | eval a_ll = contains(a, "int"); a:keyword | a_ll:boolean hello | false ; -containsLongerSubstr#[skip:-9.2.99,reason:new string function added in 9.3] +containsLongerSubstr +required_capability: fn_contains row a = "hello" | eval a_ll = contains(a, "farewell"); a:keyword | a_ll:boolean hello | false ; -containsSame#[skip:-9.2.99,reason:new string function added in 9.3] +containsSame +required_capability: fn_contains row a = "hello" | eval a_ll = contains(a, "hello"); a:keyword | a_ll:boolean hello | true ; -containsWithSubstring#[skip:-9.2.99,reason:new string function added in 9.3] +containsWithSubstring +required_capability: fn_contains from employees | where emp_no <= 10010 | eval f_s = substring(last_name, 2) | eval f_l = contains(last_name, f_s) | keep emp_no, last_name, f_s, f_l; ignoreOrder:true @@ -440,28 +445,32 @@ emp_no:integer | last_name:keyword | f_s:keyword | f_l:boolean 10010 | Piveteau | iveteau | true ; -containsUtf16Emoji#[skip:-9.2.99,reason:new string function added in 9.3] +containsUtf16Emoji +required_capability: fn_contains row a = "🐱Meow!🐶Woof!" | eval f_s = substring(a, 2) | eval f_l = contains(a, f_s); a:keyword | f_s:keyword | f_l:boolean 🐱Meow!🐶Woof! | Meow!🐶Woof! | true ; -containsNestedCase#[skip:-9.2.99,reason:new string function added in 9.3] +containsNestedCase +required_capability: fn_contains row a = "hello" | eval a_ll = CASE(contains(a, "ll"), "success","fail"); a:keyword | a_ll:keyword hello | success ; -containsNestSubstring#[skip:-9.2.99,reason:new string function added in 9.3] +containsNestSubstring +required_capability: fn_contains row a = "hello" | eval a_ll = contains(substring(a, 2), "ll"); a:keyword | a_ll:boolean hello | true ; -containsWarnings#[skip:-9.2.99,reason:new string function added in 9.3] +containsWarnings +required_capability: fn_contains from hosts | where host=="epsilon" | eval l1 = contains(host_group, "ate"), l2 = contains(description, "ate") | keep l1, l2; ignoreOrder:true diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 3480212fb5b06..18b928169fab6 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -190,6 +190,11 @@ public enum Cap { */ FN_REVERSE_GRAPHEME_CLUSTERS, + /** + * Support for function {@code CONTAINS}. Done in #133016. + */ + FN_CONTAINS, + /** * Support for function {@code CBRT}. Done in #108574. */ diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Contains.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Contains.java index c6ff994273567..660af41bfb154 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Contains.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Contains.java @@ -103,8 +103,7 @@ static boolean process(BytesRef str, BytesRef substr) { if (str == null || substr == null || str.length < substr.length) { return false; } - String utf8ToString = str.utf8ToString(); - return utf8ToString.contains(substr.utf8ToString()); + return str.utf8ToString().contains(substr.utf8ToString()); } @Override From 60ebb6bf2cf5955f228be4030d006ec15f5e0396 Mon Sep 17 00:00:00 2001 From: mjmbischoff Date: Sun, 24 Aug 2025 16:09:25 +0200 Subject: [PATCH 16/16] Remove null checks for `str` and `substr` in CONTAINS function logic --- .../xpack/esql/expression/function/scalar/string/Contains.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Contains.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Contains.java index 660af41bfb154..fd7f914f2a44c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Contains.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Contains.java @@ -100,7 +100,7 @@ public boolean foldable() { @Evaluator static boolean process(BytesRef str, BytesRef substr) { - if (str == null || substr == null || str.length < substr.length) { + if (str.length < substr.length) { return false; } return str.utf8ToString().contains(substr.utf8ToString());