From 53f6aefd02ddfcf1759d264147f430581db4973b Mon Sep 17 00:00:00 2001 From: Luigi Dell'Aquila Date: Wed, 30 Apr 2025 09:56:25 +0200 Subject: [PATCH 1/7] ES|QL: add local optimizations for constant_keyword --- .../optimizer/LocalLogicalPlanOptimizer.java | 2 + .../local/ReplaceConstantKeywords.java | 95 +++++++++++++++ .../xpack/esql/stats/SearchContextStats.java | 33 ++++++ .../xpack/esql/stats/SearchStats.java | 8 ++ .../LocalPhysicalPlanOptimizerTests.java | 108 ++++++++++++++++++ .../rest-api-spec/test/esql/30_types.yml | 98 ++++++++++++++++ 6 files changed, 344 insertions(+) create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceConstantKeywords.java diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java index 188ea200c89fe..8bbf2814a024e 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java @@ -12,6 +12,7 @@ import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.InferIsNotNull; import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.InferNonNullAggConstraint; import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.LocalPropagateEmptyRelation; +import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.ReplaceConstantKeywords; import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.ReplaceMissingFieldWithNull; import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.ReplaceTopNWithLimitAndSort; import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; @@ -38,6 +39,7 @@ public class LocalLogicalPlanOptimizer extends ParameterizedRuleExecutor( "Local rewrite", Limiter.ONCE, + new ReplaceConstantKeywords(), new ReplaceTopNWithLimitAndSort(), new ReplaceMissingFieldWithNull(), new InferIsNotNull(), diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceConstantKeywords.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceConstantKeywords.java new file mode 100644 index 0000000000000..974541c22c303 --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceConstantKeywords.java @@ -0,0 +1,95 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.optimizer.rules.logical.local; + +import org.elasticsearch.index.IndexMode; +import org.elasticsearch.xpack.esql.core.expression.Alias; +import org.elasticsearch.xpack.esql.core.expression.Attribute; +import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; +import org.elasticsearch.xpack.esql.core.expression.Literal; +import org.elasticsearch.xpack.esql.core.expression.NamedExpression; +import org.elasticsearch.xpack.esql.optimizer.LocalLogicalOptimizerContext; +import org.elasticsearch.xpack.esql.plan.logical.EsRelation; +import org.elasticsearch.xpack.esql.plan.logical.Eval; +import org.elasticsearch.xpack.esql.plan.logical.Filter; +import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; +import org.elasticsearch.xpack.esql.plan.logical.OrderBy; +import org.elasticsearch.xpack.esql.plan.logical.Project; +import org.elasticsearch.xpack.esql.plan.logical.RegexExtract; +import org.elasticsearch.xpack.esql.plan.logical.TopN; +import org.elasticsearch.xpack.esql.rule.ParameterizedRule; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Look for any constant_keyword fields used in the plan and replaces them with their actual value. + */ +public class ReplaceConstantKeywords extends ParameterizedRule { + + @Override + public LogicalPlan apply(LogicalPlan plan, LocalLogicalOptimizerContext localLogicalOptimizerContext) { + Map attrToValue = new HashMap<>(); + plan.forEachUp(EsRelation.class, esRelation -> { + if (esRelation.indexMode() == IndexMode.STANDARD) { + for (Attribute attribute : esRelation.output()) { + var val = localLogicalOptimizerContext.searchStats().constantValue(attribute.name()); + if (val != null) { + attrToValue.put( + attribute, + new Alias(attribute.source(), attribute.name(), Literal.of(attribute, val), attribute.id()) + ); + } + } + } + }); + if (attrToValue.isEmpty()) { + return plan; + } + return plan.transformUp(p -> replaceAttributes(p, attrToValue)); + } + + private LogicalPlan replaceAttributes(LogicalPlan plan, Map attrToValue) { + if (plan instanceof EsRelation relation) { + // For any missing field, place an Eval right after the EsRelation to assign constant values to that attribute (using the same + // name + // id!), thus avoiding that InsertFieldExtrations inserts a field extraction later. + // This means that an EsRelation[field1, field2, field3] where field1 and field 3 are constants, will be replaced by + // Project[field1, field2, field3] <- keeps the ordering intact + // \_Eval[field1 = value, field3 = value] + // \_EsRelation[field1, field2, field3] + List relationOutput = relation.output(); + List newProjections = new ArrayList<>(relationOutput.size()); + for (int i = 0, size = relationOutput.size(); i < size; i++) { + Attribute attr = relationOutput.get(i); + Alias alias = attrToValue.get(attr); + newProjections.add(alias == null ? attr : alias); + } + + Eval eval = new Eval(plan.source(), relation, new ArrayList<>(attrToValue.values())); + // This projection is redundant if there's another projection downstream (and no commands depend on the order until we hit it). + return new Project(plan.source(), eval, newProjections.stream().map(NamedExpression::toAttribute).toList()); + } + + if (plan instanceof Eval + || plan instanceof Filter + || plan instanceof OrderBy + || plan instanceof RegexExtract + || plan instanceof TopN) { + return plan.transformExpressionsOnlyUp(FieldAttribute.class, f -> { + Alias alias = attrToValue.get(f); + return alias != null ? alias.child() : f; + }); + } + + return plan; + } + +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchContextStats.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchContextStats.java index 57ad2d0275a88..016212884e3c9 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchContextStats.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchContextStats.java @@ -310,6 +310,39 @@ public boolean canUseEqualityOnSyntheticSourceDelegate(String name, String value return true; } + public String constantValue(String name) { + String val = null; + for (SearchExecutionContext ctx : contexts) { + MappedFieldType f = ctx.getFieldType(name); + if (f == null) { + return null; + } + if (f instanceof ConstantFieldType cf) { + var fetcher = cf.valueFetcher(ctx, null); + String thisVal = null; + try { + // since the value is a constant, the doc _should_ be irrelevant + List vals = fetcher.fetchValues(null, -1, null); + Object objVal = vals.size() == 1 ? vals.get(0) : null; + // we are considering only string values for now, since this can return "strange" things, + // see IndexModeFieldType + thisVal = objVal instanceof String ? (String) objVal : null; + } catch (IOException iox) {} + + if (thisVal == null) { + // Value not yet set + return null; + } + if (val == null) { + val = thisVal; + } else if (thisVal.equals(val) == false) { + return null; + } + } + } + return val; + } + private interface DocCountTester { Boolean test(LeafReader leafReader) throws IOException; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchStats.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchStats.java index e00de98178832..748ed826836e7 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchStats.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchStats.java @@ -39,6 +39,14 @@ public interface SearchStats { boolean canUseEqualityOnSyntheticSourceDelegate(String name, String value); + /** + * Returns the value for a field if it's a constant (eg. a constant_keyword with only one value for the involved indices). + * NULL if the field is not a constant. + */ + default String constantValue(String name) { + return null; + } + /** * When there are no search stats available, for example when there are no search contexts, we have static results. */ diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java index e540649e8c602..74d5d6ff41cb0 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java @@ -42,6 +42,7 @@ import org.elasticsearch.xpack.esql.core.expression.Expressions; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.Literal; +import org.elasticsearch.xpack.esql.core.expression.NamedExpression; import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; @@ -69,6 +70,7 @@ import org.elasticsearch.xpack.esql.plan.physical.FilterExec; import org.elasticsearch.xpack.esql.plan.physical.LimitExec; import org.elasticsearch.xpack.esql.plan.physical.LocalSourceExec; +import org.elasticsearch.xpack.esql.plan.physical.MvExpandExec; import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; import org.elasticsearch.xpack.esql.plan.physical.ProjectExec; import org.elasticsearch.xpack.esql.plan.physical.TimeSeriesAggregateExec; @@ -150,6 +152,18 @@ public boolean isSingleValue(String field) { } }; + private final SearchStats CONSTANT_K_STATS = new TestSearchStats() { + @Override + public boolean isSingleValue(String field) { + return true; + } + + @Override + public String constantValue(String name) { + return name.startsWith("constant_keyword") ? "foo" : null; + } + }; + @ParametersFactory(argumentFormatting = PARAM_FORMATTING) public static List readScriptSpec() { return settings().stream().map(t -> { @@ -1856,6 +1870,100 @@ public void testPushDownFieldExtractToTimeSeriesSource() { assertTrue(timeSeriesSource.attrs().stream().noneMatch(EsQueryExec::isSourceAttribute)); } + /** + * LimitExec[1000[INTEGER]] + * \_ExchangeExec[[!alias_integer, boolean{f}#4, byte{f}#5, constant_keyword-foo{f}#6, date{f}#7, date_nanos{f}#8, double{f}#9, + * float{f}#10, half_float{f}#11, integer{f}#13, ip{f}#14, keyword{f}#15, long{f}#16, scaled_float{f}#12, !semantic_text, + * short{f}#18, text{f}#19, unsigned_long{f}#17, version{f}#20, wildcard{f}#21], false] + * \_ProjectExec[[!alias_integer, boolean{f}#4, byte{f}#5, constant_keyword-foo{r}#6, date{f}#7, date_nanos{f}#8, double{f}#9, + * float{f}#10, half_float{f}#11, integer{f}#13, ip{f}#14, keyword{f}#15, long{f}#16, scaled_float{f}#12, + * !semantic_text, short{f}#18, text{f}#19, unsigned_long{f}#17, version{f}#20, wildcard{f}#21]] + * \_FieldExtractExec[!alias_integer, boolean{f}#4, byte{f}#5, date{f}#7, ] + * \_EvalExec[[[66 6f 6f][KEYWORD] AS constant_keyword-foo]] + * \_EsQueryExec[test], indexMode[standard], query[][_doc{f}#23], limit[1000], sort[] estimatedRowSize[412] + */ + public void testConstantKeywordWithMatchingFilter() { + String queryText = """ + from test + | where `constant_keyword-foo` == "foo" + """; + var analyzer = makeAnalyzer("mapping-all-types.json"); + var plan = plannerOptimizer.plan(queryText, CONSTANT_K_STATS, analyzer); + + var limit = as(plan, LimitExec.class); + var exchange = as(limit.child(), ExchangeExec.class); + var project = as(exchange.child(), ProjectExec.class); + var field = as(project.child(), FieldExtractExec.class); + var eval = as(field.child(), EvalExec.class); + var query = as(eval.child(), EsQueryExec.class); + assertThat(as(query.limit(), Literal.class).value(), is(1000)); + assertNull(query.query()); + assertFalse(field.attributesToExtract().stream().map(NamedExpression::name).anyMatch(x -> x.equals("constant_keyword-foo"))); + } + + /** + * LimitExec[1000[INTEGER]] + * \_ExchangeExec[[!alias_integer, boolean{f}#4, byte{f}#5, constant_keyword-foo{f}#6, date{f}#7, date_nanos{f}#8, double{f}#9, + * float{f}#10, half_float{f}#11, integer{f}#13, ip{f}#14, keyword{f}#15, long{f}#16, scaled_float{f}#12, !semantic_text, + * short{f}#18, text{f}#19, unsigned_long{f}#17, version{f}#20, wildcard{f}#21], false] + * \_LocalSourceExec[[!alias_integer, boolean{f}#4, byte{f}#5, constant_keyword-foo{f}#6, date{f}#7, date_nanos{f}#8, double{f}#9, + * float{f}#10, half_float{f}#11, integer{f}#13, ip{f}#14, keyword{f}#15, long{f}#16, scaled_float{f}#12, !semantic_text, + * short{f}#18, text{f}#19, unsigned_long{f}#17, version{f}#20, wildcard{f}#21], EMPTY] + */ + public void testConstantKeywordWithNonMatchingFilter() { + String queryText = """ + from test + | where `constant_keyword-foo` == "non-matching" + """; + var analyzer = makeAnalyzer("mapping-all-types.json"); + var plan = plannerOptimizer.plan(queryText, CONSTANT_K_STATS, analyzer); + + var limit = as(plan, LimitExec.class); + var exchange = as(limit.child(), ExchangeExec.class); + var source = as(exchange.child(), LocalSourceExec.class); + } + + /** + * LimitExec[1000[INTEGER]] + * \_ExchangeExec[[!alias_integer, boolean{f}#6, byte{f}#7, constant_keyword-foo{r}#25, date{f}#9, date_nanos{f}#10, double{f}#1 + * 1, float{f}#12, half_float{f}#13, integer{f}#15, ip{f}#16, keyword{f}#17, long{f}#18, scaled_float{f}#14, + * !semantic_text, short{f}#20, text{f}#21, unsigned_long{f}#19, version{f}#22, wildcard{f}#23], false] + * \_ProjectExec[[!alias_integer, boolean{f}#6, byte{f}#7, constant_keyword-foo{r}#25, date{f}#9, date_nanos{f}#10, double{f}#1 + * 1, float{f}#12, half_float{f}#13, integer{f}#15, ip{f}#16, keyword{f}#17, long{f}#18, scaled_float{f}#14, + * !semantic_text, short{f}#20, text{f}#21, unsigned_long{f}#19, version{f}#22, wildcard{f}#23]] + * \_LimitExec[1000[INTEGER]] + * \_FilterExec[constant_keyword-foo{r}#25 == [66 6f 6f][KEYWORD]] + * \_MvExpandExec[constant_keyword-foo{f}#8,constant_keyword-foo{r}#25] + * \_ProjectExec[[!alias_integer, boolean{f}#6, byte{f}#7, constant_keyword-foo{r}#8, date{f}#9, date_nanos{f}#10, + * double{f}#11, float{f}#12, half_float{f}#13, integer{f}#15, ip{f}#16, keyword{f}#17, long{f}#18, scaled_float{f}#14, + * !semantic_text, short{f}#20, text{f}#21, unsigned_long{f}#19, version{f}#22, wildcard{f}#23]] + * \_FieldExtractExec[!alias_integer, boolean{f}#6, byte{f}#7, date{f}#9, ..] + * \_EvalExec[[[66 6f 6f][KEYWORD] AS constant_keyword-foo]] + * \_EsQueryExec[test], indexMode[standard], query[][_doc{f}#26], limit[], sort[] estimatedRowSize[412] + */ + public void testConstantKeywordExpandFilter() { + String queryText = """ + from test + | mv_expand `constant_keyword-foo` + | where `constant_keyword-foo` == "foo" + """; + var analyzer = makeAnalyzer("mapping-all-types.json"); + var plan = plannerOptimizer.plan(queryText, CONSTANT_K_STATS, analyzer); + + var limit = as(plan, LimitExec.class); + var exchange = as(limit.child(), ExchangeExec.class); + var project = as(exchange.child(), ProjectExec.class); + var limit2 = as(project.child(), LimitExec.class); + var filter = as(limit2.child(), FilterExec.class); + var expand = as(filter.child(), MvExpandExec.class); + var project2 = as(expand.child(), ProjectExec.class); + var field = as(project2.child(), FieldExtractExec.class); + var eval = as(field.child(), EvalExec.class); + var query = as(eval.child(), EsQueryExec.class); + assertNull(query.query()); + assertFalse(field.attributesToExtract().stream().map(NamedExpression::name).anyMatch(x -> x.equals("constant_keyword-foo"))); + } + private QueryBuilder wrapWithSingleQuery(String query, QueryBuilder inner, String fieldName, Source source) { return FilterTests.singleValueQuery(query, inner, fieldName, source); } diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/30_types.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/30_types.yml index 1f9ff72669309..22077823197b4 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/30_types.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/30_types.yml @@ -29,6 +29,26 @@ constant_keyword: - { "index": { } } - { "color": "red" } + - do: + indices.create: + index: test_2 + body: + mappings: + properties: + kind: + type: constant_keyword + value: a different constant + color: + type: keyword + + - do: + bulk: + index: test_2 + refresh: true + body: + - { "index": { } } + - { "color": "blue" } + - do: allowed_warnings_regex: - "No limit defined, adding default limit of \\[.*\\]" @@ -54,6 +74,84 @@ constant_keyword: - length: {values: 1} - match: {values.0.0: 17} + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'from test | where kind == "wow such constant" | keep color, kind' + - match: {columns.0.name: color} + - match: {columns.0.type: keyword} + - match: { columns.1.name: kind } + - match: { columns.1.type: keyword } + - length: {values: 1} + - match: {values.0.0: red} + - match: {values.0.1: "wow such constant"} + + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'from test | dissect kind "%{one} %{two} %{three}" | keep one, two, three, kind' + - match: {columns.0.name: one} + - match: {columns.0.type: keyword} + - match: {columns.1.name: two } + - match: {columns.1.type: keyword } + - match: {columns.2.name: three } + - match: {columns.2.type: keyword } + - match: {columns.3.name: kind } + - match: {columns.3.type: keyword } + + - length: {values: 1} + - match: {values.0.0: wow} + - match: {values.0.1: such} + - match: {values.0.2: constant} + - match: {values.0.3: wow such constant} + + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'from test | stats x = max(kind)' + - match: {columns.0.name: x} + - match: {columns.0.type: keyword} + + - length: {values: 1} + - match: {values.0.0: wow such constant} + + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'from test* | where kind == "wow such constant" | keep color, kind' + - match: {columns.0.name: color} + - match: {columns.0.type: keyword} + - match: { columns.1.name: kind } + - match: { columns.1.type: keyword } + - length: {values: 1} + - match: {values.0.0: red} + - match: {values.0.1: "wow such constant"} + + + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'from test* | where kind >= "a" | keep color, kind | sort color' + - match: {columns.0.name: color} + - match: {columns.0.type: keyword} + - match: { columns.1.name: kind } + - match: { columns.1.type: keyword } + - length: {values: 2} + - match: {values.0.0: blue} + - match: {values.0.1: "a different constant"} + - match: {values.1.0: red} + - match: {values.1.1: "wow such constant"} + --- constant_keyword with null value: - do: From 9f3beb14381b2c6a0b5dcab77b5eee9115115b24 Mon Sep 17 00:00:00 2001 From: Luigi Dell'Aquila Date: Wed, 30 Apr 2025 12:26:37 +0200 Subject: [PATCH 2/7] Simplify --- .../local/ReplaceConstantKeywords.java | 44 ++-------- .../LocalPhysicalPlanOptimizerTests.java | 81 +++++++++++-------- 2 files changed, 55 insertions(+), 70 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceConstantKeywords.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceConstantKeywords.java index 974541c22c303..a37535542b7c4 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceConstantKeywords.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceConstantKeywords.java @@ -8,25 +8,21 @@ package org.elasticsearch.xpack.esql.optimizer.rules.logical.local; import org.elasticsearch.index.IndexMode; -import org.elasticsearch.xpack.esql.core.expression.Alias; import org.elasticsearch.xpack.esql.core.expression.Attribute; +import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.Literal; -import org.elasticsearch.xpack.esql.core.expression.NamedExpression; import org.elasticsearch.xpack.esql.optimizer.LocalLogicalOptimizerContext; import org.elasticsearch.xpack.esql.plan.logical.EsRelation; import org.elasticsearch.xpack.esql.plan.logical.Eval; import org.elasticsearch.xpack.esql.plan.logical.Filter; import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.plan.logical.OrderBy; -import org.elasticsearch.xpack.esql.plan.logical.Project; import org.elasticsearch.xpack.esql.plan.logical.RegexExtract; import org.elasticsearch.xpack.esql.plan.logical.TopN; import org.elasticsearch.xpack.esql.rule.ParameterizedRule; -import java.util.ArrayList; import java.util.HashMap; -import java.util.List; import java.util.Map; /** @@ -36,16 +32,13 @@ public class ReplaceConstantKeywords extends ParameterizedRule attrToValue = new HashMap<>(); + Map attrToValue = new HashMap<>(); plan.forEachUp(EsRelation.class, esRelation -> { if (esRelation.indexMode() == IndexMode.STANDARD) { for (Attribute attribute : esRelation.output()) { var val = localLogicalOptimizerContext.searchStats().constantValue(attribute.name()); if (val != null) { - attrToValue.put( - attribute, - new Alias(attribute.source(), attribute.name(), Literal.of(attribute, val), attribute.id()) - ); + attrToValue.put(attribute, Literal.of(attribute, val)); } } } @@ -56,37 +49,16 @@ public LogicalPlan apply(LogicalPlan plan, LocalLogicalOptimizerContext localLog return plan.transformUp(p -> replaceAttributes(p, attrToValue)); } - private LogicalPlan replaceAttributes(LogicalPlan plan, Map attrToValue) { - if (plan instanceof EsRelation relation) { - // For any missing field, place an Eval right after the EsRelation to assign constant values to that attribute (using the same - // name - // id!), thus avoiding that InsertFieldExtrations inserts a field extraction later. - // This means that an EsRelation[field1, field2, field3] where field1 and field 3 are constants, will be replaced by - // Project[field1, field2, field3] <- keeps the ordering intact - // \_Eval[field1 = value, field3 = value] - // \_EsRelation[field1, field2, field3] - List relationOutput = relation.output(); - List newProjections = new ArrayList<>(relationOutput.size()); - for (int i = 0, size = relationOutput.size(); i < size; i++) { - Attribute attr = relationOutput.get(i); - Alias alias = attrToValue.get(attr); - newProjections.add(alias == null ? attr : alias); - } - - Eval eval = new Eval(plan.source(), relation, new ArrayList<>(attrToValue.values())); - // This projection is redundant if there's another projection downstream (and no commands depend on the order until we hit it). - return new Project(plan.source(), eval, newProjections.stream().map(NamedExpression::toAttribute).toList()); - } - + private LogicalPlan replaceAttributes(LogicalPlan plan, Map attrToValue) { + // This is slightly different from ReplaceMissingFieldWithNull. + // It's on purpose: reusing NameIDs is dangerous, and we have no evidence that adding an EVAL will actually lead to + // practical performance benefits if (plan instanceof Eval || plan instanceof Filter || plan instanceof OrderBy || plan instanceof RegexExtract || plan instanceof TopN) { - return plan.transformExpressionsOnlyUp(FieldAttribute.class, f -> { - Alias alias = attrToValue.get(f); - return alias != null ? alias.child() : f; - }); + return plan.transformExpressionsOnlyUp(FieldAttribute.class, f -> attrToValue.getOrDefault(f, f)); } return plan; diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java index 74d5d6ff41cb0..d013bd15a0e6a 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java @@ -42,7 +42,6 @@ import org.elasticsearch.xpack.esql.core.expression.Expressions; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.Literal; -import org.elasticsearch.xpack.esql.core.expression.NamedExpression; import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; @@ -60,6 +59,7 @@ import org.elasticsearch.xpack.esql.plan.logical.Enrich; import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.plan.physical.AggregateExec; +import org.elasticsearch.xpack.esql.plan.physical.DissectExec; import org.elasticsearch.xpack.esql.plan.physical.EsQueryExec; import org.elasticsearch.xpack.esql.plan.physical.EsStatsQueryExec; import org.elasticsearch.xpack.esql.plan.physical.EsStatsQueryExec.Stat; @@ -1872,15 +1872,14 @@ public void testPushDownFieldExtractToTimeSeriesSource() { /** * LimitExec[1000[INTEGER]] - * \_ExchangeExec[[!alias_integer, boolean{f}#4, byte{f}#5, constant_keyword-foo{f}#6, date{f}#7, date_nanos{f}#8, double{f}#9, - * float{f}#10, half_float{f}#11, integer{f}#13, ip{f}#14, keyword{f}#15, long{f}#16, scaled_float{f}#12, !semantic_text, - * short{f}#18, text{f}#19, unsigned_long{f}#17, version{f}#20, wildcard{f}#21], false] - * \_ProjectExec[[!alias_integer, boolean{f}#4, byte{f}#5, constant_keyword-foo{r}#6, date{f}#7, date_nanos{f}#8, double{f}#9, - * float{f}#10, half_float{f}#11, integer{f}#13, ip{f}#14, keyword{f}#15, long{f}#16, scaled_float{f}#12, - * !semantic_text, short{f}#18, text{f}#19, unsigned_long{f}#17, version{f}#20, wildcard{f}#21]] - * \_FieldExtractExec[!alias_integer, boolean{f}#4, byte{f}#5, date{f}#7, ] - * \_EvalExec[[[66 6f 6f][KEYWORD] AS constant_keyword-foo]] - * \_EsQueryExec[test], indexMode[standard], query[][_doc{f}#23], limit[1000], sort[] estimatedRowSize[412] + * \_ExchangeExec[[!alias_integer, boolean{f}#415, byte{f}#416, constant_keyword-foo{f}#417, date{f}#418, date_nanos{f}#419, + * double{f}#420, float{f}#421, half_float{f}#422, integer{f}#424, ip{f}#425, keyword{f}#426, long{f}#427, scaled_float{f}#423, + * !semantic_text, short{f}#429, text{f}#430, unsigned_long{f}#428, version{f}#431, wildcard{f}#432], false] + * \_ProjectExec[[!alias_integer, boolean{f}#415, byte{f}#416, constant_keyword-foo{f}#417, date{f}#418, date_nanos{f}#419, + * double{f}#420, float{f}#421, half_float{f}#422, integer{f}#424, ip{f}#425, keyword{f}#426, long{f}#427, scaled_float{f}#423, + * !semantic_text, short{f}#429, text{f}#430, unsigned_long{f}#428, version{f}#431, wildcard{f}#432]] + * \_FieldExtractExec[!alias_integer, boolean{f}#415, byte{f}#416, consta..] + * \_EsQueryExec[test], indexMode[standard], query[][_doc{f}#434], limit[1000], sort[] estimatedRowSize[412] */ public void testConstantKeywordWithMatchingFilter() { String queryText = """ @@ -1894,11 +1893,9 @@ public void testConstantKeywordWithMatchingFilter() { var exchange = as(limit.child(), ExchangeExec.class); var project = as(exchange.child(), ProjectExec.class); var field = as(project.child(), FieldExtractExec.class); - var eval = as(field.child(), EvalExec.class); - var query = as(eval.child(), EsQueryExec.class); + var query = as(field.child(), EsQueryExec.class); assertThat(as(query.limit(), Literal.class).value(), is(1000)); assertNull(query.query()); - assertFalse(field.attributesToExtract().stream().map(NamedExpression::name).anyMatch(x -> x.equals("constant_keyword-foo"))); } /** @@ -1925,21 +1922,14 @@ public void testConstantKeywordWithNonMatchingFilter() { /** * LimitExec[1000[INTEGER]] - * \_ExchangeExec[[!alias_integer, boolean{f}#6, byte{f}#7, constant_keyword-foo{r}#25, date{f}#9, date_nanos{f}#10, double{f}#1 - * 1, float{f}#12, half_float{f}#13, integer{f}#15, ip{f}#16, keyword{f}#17, long{f}#18, scaled_float{f}#14, - * !semantic_text, short{f}#20, text{f}#21, unsigned_long{f}#19, version{f}#22, wildcard{f}#23], false] - * \_ProjectExec[[!alias_integer, boolean{f}#6, byte{f}#7, constant_keyword-foo{r}#25, date{f}#9, date_nanos{f}#10, double{f}#1 - * 1, float{f}#12, half_float{f}#13, integer{f}#15, ip{f}#16, keyword{f}#17, long{f}#18, scaled_float{f}#14, - * !semantic_text, short{f}#20, text{f}#21, unsigned_long{f}#19, version{f}#22, wildcard{f}#23]] - * \_LimitExec[1000[INTEGER]] - * \_FilterExec[constant_keyword-foo{r}#25 == [66 6f 6f][KEYWORD]] - * \_MvExpandExec[constant_keyword-foo{f}#8,constant_keyword-foo{r}#25] - * \_ProjectExec[[!alias_integer, boolean{f}#6, byte{f}#7, constant_keyword-foo{r}#8, date{f}#9, date_nanos{f}#10, - * double{f}#11, float{f}#12, half_float{f}#13, integer{f}#15, ip{f}#16, keyword{f}#17, long{f}#18, scaled_float{f}#14, - * !semantic_text, short{f}#20, text{f}#21, unsigned_long{f}#19, version{f}#22, wildcard{f}#23]] - * \_FieldExtractExec[!alias_integer, boolean{f}#6, byte{f}#7, date{f}#9, ..] - * \_EvalExec[[[66 6f 6f][KEYWORD] AS constant_keyword-foo]] - * \_EsQueryExec[test], indexMode[standard], query[][_doc{f}#26], limit[], sort[] estimatedRowSize[412] + * \_ExchangeExec[[!alias_integer, boolean{f}#6, byte{f}#7, constant_keyword-foo{r}#25, date{f}#9, date_nanos{f}#10, double{f}#1... + * \_ProjectExec[[!alias_integer, boolean{f}#6, byte{f}#7, constant_keyword-foo{r}#25, date{f}#9, date_nanos{f}#10, double{f}#1... + * \_FieldExtractExec[!alias_integer, boolean{f}#6, byte{f}#7, date{f}#9, + * \_LimitExec[1000[INTEGER]] + * \_FilterExec[constant_keyword-foo{r}#25 == [66 6f 6f][KEYWORD]] + * \_MvExpandExec[constant_keyword-foo{f}#8,constant_keyword-foo{r}#25] + * \_FieldExtractExec[constant_keyword-foo{f}#8] + * \_EsQueryExec[test], indexMode[standard], query[][_doc{f}#26], limit[], sort[] estimatedRowSize[412] */ public void testConstantKeywordExpandFilter() { String queryText = """ @@ -1953,15 +1943,38 @@ public void testConstantKeywordExpandFilter() { var limit = as(plan, LimitExec.class); var exchange = as(limit.child(), ExchangeExec.class); var project = as(exchange.child(), ProjectExec.class); - var limit2 = as(project.child(), LimitExec.class); + var fieldExtract = as(project.child(), FieldExtractExec.class); + var limit2 = as(fieldExtract.child(), LimitExec.class); var filter = as(limit2.child(), FilterExec.class); var expand = as(filter.child(), MvExpandExec.class); - var project2 = as(expand.child(), ProjectExec.class); - var field = as(project2.child(), FieldExtractExec.class); - var eval = as(field.child(), EvalExec.class); - var query = as(eval.child(), EsQueryExec.class); + var field = as(expand.child(), FieldExtractExec.class); // MV_EXPAND is not optimized yet (it doesn't accept literals) + as(field.child(), EsQueryExec.class); + } + + /** + * DissectExec[constant_keyword-foo{f}#8,Parser[pattern=%{bar}, appendSeparator=, ... + * \_LimitExec[1000[INTEGER]] + * \_ExchangeExec[[!alias_integer, boolean{f}#6, byte{f}#7, constant_keyword-foo{f}#8, date{f}#9, date_nanos{f}#10, double{f}#11... + * \_ProjectExec[[!alias_integer, boolean{f}#6, byte{f}#7, constant_keyword-foo{f}#8, date{f}#9, date_nanos{f}#10, double{f}#11... + * \_FieldExtractExec[!alias_integer, boolean{f}#6, byte{f}#7, constant_k..] + * \_EsQueryExec[test], indexMode[standard], query[][_doc{f}#25], limit[1000], sort[] estimatedRowSize[462] + */ + public void testConstantKeywordDissectFilter() { + String queryText = """ + from test + | dissect `constant_keyword-foo` "%{bar}" + | where `constant_keyword-foo` == "foo" + """; + var analyzer = makeAnalyzer("mapping-all-types.json"); + var plan = plannerOptimizer.plan(queryText, CONSTANT_K_STATS, analyzer); + + var dissect = as(plan, DissectExec.class); + var limit = as(dissect.child(), LimitExec.class); + var exchange = as(limit.child(), ExchangeExec.class); + var project = as(exchange.child(), ProjectExec.class); + var field = as(project.child(), FieldExtractExec.class); + var query = as(field.child(), EsQueryExec.class); assertNull(query.query()); - assertFalse(field.attributesToExtract().stream().map(NamedExpression::name).anyMatch(x -> x.equals("constant_keyword-foo"))); } private QueryBuilder wrapWithSingleQuery(String query, QueryBuilder inner, String fieldName, Source source) { From 0df9c68033eb9b700f5b1d77289e5777a011847c Mon Sep 17 00:00:00 2001 From: Luigi Dell'Aquila Date: Fri, 2 May 2025 12:13:24 +0200 Subject: [PATCH 3/7] Update docs/changelog/127549.yaml --- docs/changelog/127549.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/127549.yaml diff --git a/docs/changelog/127549.yaml b/docs/changelog/127549.yaml new file mode 100644 index 0000000000000..5f24111d22689 --- /dev/null +++ b/docs/changelog/127549.yaml @@ -0,0 +1,5 @@ +pr: 127549 +summary: Add local optimizations for `constant_keyword` +area: ES|QL +type: enhancement +issues: [] From 7ed35464f73b9281b08e0129f9015556082841b8 Mon Sep 17 00:00:00 2001 From: Luigi Dell'Aquila Date: Fri, 2 May 2025 14:28:19 +0200 Subject: [PATCH 4/7] Fix cases with mix of constant_keyword and other types --- .../xpack/esql/stats/SearchContextStats.java | 2 + .../rest-api-spec/test/esql/30_types.yml | 104 +++++++++++++++++- 2 files changed, 104 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchContextStats.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchContextStats.java index 016212884e3c9..8cb10d14bb578 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchContextStats.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchContextStats.java @@ -338,6 +338,8 @@ public String constantValue(String name) { } else if (thisVal.equals(val) == false) { return null; } + } else { + return null; } } return val; diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/30_types.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/30_types.yml index 22077823197b4..b4765ef76aaf8 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/30_types.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/30_types.yml @@ -49,6 +49,27 @@ constant_keyword: - { "index": { } } - { "color": "blue" } + - do: + indices.create: + index: test_3 + body: + mappings: + properties: + kind: + type: keyword + color: + type: keyword + + - do: + bulk: + index: test_3 + refresh: true + body: + - { "index": { } } + - { "kind":"not a constant", "color": "pink" } + - { "index": { } } + - { "kind": "still no constant", "color": "pink" } + - do: allowed_warnings_regex: - "No limit defined, adding default limit of \\[.*\\]" @@ -121,6 +142,21 @@ constant_keyword: - length: {values: 1} - match: {values.0.0: wow such constant} + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'from test,test_2 | where kind == "wow such constant" | keep color, kind' + - match: {columns.0.name: color} + - match: {columns.0.type: keyword} + - match: { columns.1.name: kind } + - match: { columns.1.type: keyword } + - length: {values: 1} + - match: {values.0.0: red} + - match: {values.0.1: "wow such constant"} + + - do: allowed_warnings_regex: - "No limit defined, adding default limit of \\[.*\\]" @@ -135,20 +171,84 @@ constant_keyword: - match: {values.0.0: red} - match: {values.0.1: "wow such constant"} + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'from test,test_2 | where kind == "wow such constant" | keep color, kind' + - match: {columns.0.name: color} + - match: {columns.0.type: keyword} + - match: { columns.1.name: kind } + - match: { columns.1.type: keyword } + - length: {values: 1} + - match: {values.0.0: red} + - match: {values.0.1: "wow such constant"} + - do: allowed_warnings_regex: - "No limit defined, adding default limit of \\[.*\\]" esql.query: body: - query: 'from test* | where kind >= "a" | keep color, kind | sort color' + query: 'from test* | where kind == "wow such constant" | keep color, kind' - match: {columns.0.name: color} - match: {columns.0.type: keyword} - match: { columns.1.name: kind } - match: { columns.1.type: keyword } - - length: {values: 2} + - length: {values: 1} + - match: {values.0.0: red} + - match: {values.0.1: "wow such constant"} + + + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'from test* | where kind == "not a constant" | keep color, kind' + - match: {columns.0.name: color} + - match: {columns.0.type: keyword} + - match: { columns.1.name: kind } + - match: { columns.1.type: keyword } + - length: {values: 1} + - match: {values.0.0: pink} + - match: {values.0.1: "not a constant"} + + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'from test* | where kind >= "a" | keep color, kind | sort color, kind' + - match: {columns.0.name: color} + - match: {columns.0.type: keyword} + - match: { columns.1.name: kind } + - match: { columns.1.type: keyword } + - length: {values: 4} - match: {values.0.0: blue} - match: {values.0.1: "a different constant"} + - match: {values.1.0: pink} + - match: {values.1.1: "not a constant"} + - match: {values.2.0: pink} + - match: {values.2.1: "still no constant"} + - match: {values.3.0: red} + - match: {values.3.1: "wow such constant"} + + + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'from test* | where kind >= "o" | keep color, kind | sort color, kind' + - match: {columns.0.name: color} + - match: {columns.0.type: keyword} + - match: { columns.1.name: kind } + - match: { columns.1.type: keyword } + - length: {values: 2} + - match: {values.0.0: pink} + - match: {values.0.1: "still no constant"} - match: {values.1.0: red} - match: {values.1.1: "wow such constant"} From 23dc016b9168a62fa53cb7d3af6355b4fc725e01 Mon Sep 17 00:00:00 2001 From: Luigi Dell'Aquila Date: Mon, 5 May 2025 12:58:40 +0200 Subject: [PATCH 5/7] More tests --- .../rest-api-spec/test/esql/30_types.yml | 67 +++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/30_types.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/30_types.yml index b4765ef76aaf8..c00a6bc4e320c 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/30_types.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/30_types.yml @@ -70,6 +70,25 @@ constant_keyword: - { "index": { } } - { "kind": "still no constant", "color": "pink" } + - do: + indices.create: + index: text_test + body: + mappings: + properties: + kind: + type: text + color: + type: keyword + + - do: + bulk: + index: text_test + refresh: true + body: + - { "index": { } } + - { "kind":"a text field", "color": "green" } + - do: allowed_warnings_regex: - "No limit defined, adding default limit of \\[.*\\]" @@ -252,6 +271,54 @@ constant_keyword: - match: {values.1.0: red} - match: {values.1.1: "wow such constant"} + + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'from test,text_test | where kind::string == "a text field" | eval kind = kind::string | keep color, kind' + - match: {columns.0.name: color} + - match: {columns.0.type: keyword} + - match: { columns.1.name: kind } + - match: { columns.1.type: keyword } + - length: {values: 1} + - match: {values.0.0: green} + - match: {values.0.1: "a text field"} + + + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'from test,text_test | where kind::string == "wow such constant" | eval kind = kind::string | keep color, kind' + - match: {columns.0.name: color} + - match: {columns.0.type: keyword} + - match: { columns.1.name: kind } + - match: { columns.1.type: keyword } + - length: {values: 1} + - match: {values.0.0: red} + - match: {values.0.1: "wow such constant"} + + + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'from test,text_test | where kind::string >= "a" | eval kind = kind::string | keep color, kind | sort color' + - match: {columns.0.name: color} + - match: {columns.0.type: keyword} + - match: { columns.1.name: kind } + - match: { columns.1.type: keyword } + - length: {values: 2} + - match: {values.0.0: green} + - match: {values.0.1: "a text field"} + - match: {values.1.0: red} + - match: {values.1.1: "wow such constant"} + + --- constant_keyword with null value: - do: From 3a52b87a30ef6d07bb878538ad1dc2f22942bd77 Mon Sep 17 00:00:00 2001 From: Luigi Dell'Aquila Date: Mon, 19 May 2025 10:53:30 +0200 Subject: [PATCH 6/7] Unify ReplaceMissingFieldWithNull and ReplaceConstantKeyword in a single rule --- .../optimizer/LocalLogicalPlanOptimizer.java | 6 +- .../local/ReplaceConstantKeywords.java | 67 ------------------- ...ull.java => ReplaceFieldWithConstant.java} | 28 ++++++-- 3 files changed, 25 insertions(+), 76 deletions(-) delete mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceConstantKeywords.java rename x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/{ReplaceMissingFieldWithNull.java => ReplaceFieldWithConstant.java} (82%) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java index 8bbf2814a024e..6a150da79cca3 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java @@ -12,8 +12,7 @@ import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.InferIsNotNull; import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.InferNonNullAggConstraint; import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.LocalPropagateEmptyRelation; -import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.ReplaceConstantKeywords; -import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.ReplaceMissingFieldWithNull; +import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.ReplaceFieldWithConstant; import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.ReplaceTopNWithLimitAndSort; import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.rule.ParameterizedRuleExecutor; @@ -39,9 +38,8 @@ public class LocalLogicalPlanOptimizer extends ParameterizedRuleExecutor( "Local rewrite", Limiter.ONCE, - new ReplaceConstantKeywords(), new ReplaceTopNWithLimitAndSort(), - new ReplaceMissingFieldWithNull(), + new ReplaceFieldWithConstant(), new InferIsNotNull(), new InferNonNullAggConstraint() ), diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceConstantKeywords.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceConstantKeywords.java deleted file mode 100644 index a37535542b7c4..0000000000000 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceConstantKeywords.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.esql.optimizer.rules.logical.local; - -import org.elasticsearch.index.IndexMode; -import org.elasticsearch.xpack.esql.core.expression.Attribute; -import org.elasticsearch.xpack.esql.core.expression.Expression; -import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; -import org.elasticsearch.xpack.esql.core.expression.Literal; -import org.elasticsearch.xpack.esql.optimizer.LocalLogicalOptimizerContext; -import org.elasticsearch.xpack.esql.plan.logical.EsRelation; -import org.elasticsearch.xpack.esql.plan.logical.Eval; -import org.elasticsearch.xpack.esql.plan.logical.Filter; -import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; -import org.elasticsearch.xpack.esql.plan.logical.OrderBy; -import org.elasticsearch.xpack.esql.plan.logical.RegexExtract; -import org.elasticsearch.xpack.esql.plan.logical.TopN; -import org.elasticsearch.xpack.esql.rule.ParameterizedRule; - -import java.util.HashMap; -import java.util.Map; - -/** - * Look for any constant_keyword fields used in the plan and replaces them with their actual value. - */ -public class ReplaceConstantKeywords extends ParameterizedRule { - - @Override - public LogicalPlan apply(LogicalPlan plan, LocalLogicalOptimizerContext localLogicalOptimizerContext) { - Map attrToValue = new HashMap<>(); - plan.forEachUp(EsRelation.class, esRelation -> { - if (esRelation.indexMode() == IndexMode.STANDARD) { - for (Attribute attribute : esRelation.output()) { - var val = localLogicalOptimizerContext.searchStats().constantValue(attribute.name()); - if (val != null) { - attrToValue.put(attribute, Literal.of(attribute, val)); - } - } - } - }); - if (attrToValue.isEmpty()) { - return plan; - } - return plan.transformUp(p -> replaceAttributes(p, attrToValue)); - } - - private LogicalPlan replaceAttributes(LogicalPlan plan, Map attrToValue) { - // This is slightly different from ReplaceMissingFieldWithNull. - // It's on purpose: reusing NameIDs is dangerous, and we have no evidence that adding an EVAL will actually lead to - // practical performance benefits - if (plan instanceof Eval - || plan instanceof Filter - || plan instanceof OrderBy - || plan instanceof RegexExtract - || plan instanceof TopN) { - return plan.transformExpressionsOnlyUp(FieldAttribute.class, f -> attrToValue.getOrDefault(f, f)); - } - - return plan; - } - -} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceFieldWithConstant.java similarity index 82% rename from x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java rename to x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceFieldWithConstant.java index 5a99781b2afde..f97ab4a43f78b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceFieldWithConstant.java @@ -12,6 +12,7 @@ import org.elasticsearch.xpack.esql.core.expression.Alias; import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.expression.AttributeSet; +import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.Literal; import org.elasticsearch.xpack.esql.core.expression.NamedExpression; @@ -29,19 +30,21 @@ import org.elasticsearch.xpack.esql.rule.ParameterizedRule; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.function.Predicate; /** - * Look for any fields used in the plan that are missing locally and replace them with null. + * Look for any fields used in the plan that are missing or that are constant locally and replace them with null or with the value. * This should minimize the plan execution, in the best scenario skipping its execution all together. */ -public class ReplaceMissingFieldWithNull extends ParameterizedRule { +public class ReplaceFieldWithConstant extends ParameterizedRule { @Override public LogicalPlan apply(LogicalPlan plan, LocalLogicalOptimizerContext localLogicalOptimizerContext) { var lookupFieldsBuilder = AttributeSet.builder(); + Map attrToValue = new HashMap<>(); plan.forEachUp(EsRelation.class, esRelation -> { // Looking only for indices in LOOKUP mode is correct: during parsing, we assign the expected mode and even if a lookup index // is used in the FROM command, it will not be marked with LOOKUP mode there - but STANDARD. @@ -52,6 +55,18 @@ public LogicalPlan apply(LogicalPlan plan, LocalLogicalOptimizerContext localLog if (esRelation.indexMode() == IndexMode.LOOKUP) { lookupFieldsBuilder.addAll(esRelation.output()); } + // find constant values only in the main indices + else if (esRelation.indexMode() == IndexMode.STANDARD) { + for (Attribute attribute : esRelation.output()) { + if (attribute instanceof FieldAttribute fa) { + // Do not use the attribute name, this can deviate from the field name for union types; use fieldName() instead. + var val = localLogicalOptimizerContext.searchStats().constantValue(fa.fieldName()); + if (val != null) { + attrToValue.put(attribute, Literal.of(attribute, val)); + } + } + } + } }); AttributeSet lookupFields = lookupFieldsBuilder.build(); @@ -61,10 +76,10 @@ public LogicalPlan apply(LogicalPlan plan, LocalLogicalOptimizerContext localLog || localLogicalOptimizerContext.searchStats().exists(f.fieldName()) || lookupFields.contains(f); - return plan.transformUp(p -> missingToNull(p, shouldBeRetained)); + return plan.transformUp(p -> missingToNull(p, shouldBeRetained, attrToValue)); } - private LogicalPlan missingToNull(LogicalPlan plan, Predicate shouldBeRetained) { + private LogicalPlan missingToNull(LogicalPlan plan, Predicate shouldBeRetained, Map constants) { if (plan instanceof EsRelation relation) { // For any missing field, place an Eval right after the EsRelation to assign null values to that attribute (using the same name // id!), thus avoiding that InsertFieldExtrations inserts a field extraction later. @@ -118,7 +133,10 @@ private LogicalPlan missingToNull(LogicalPlan plan, Predicate sh || plan instanceof OrderBy || plan instanceof RegexExtract || plan instanceof TopN) { - return plan.transformExpressionsOnlyUp(FieldAttribute.class, f -> shouldBeRetained.test(f) ? f : Literal.of(f, null)); + return plan.transformExpressionsOnlyUp( + FieldAttribute.class, + f -> constants.getOrDefault(f, shouldBeRetained.test(f) ? f : Literal.of(f, null)) + ); } return plan; From f6a6176ff6f32cf6419edc4c0634841185ca37e8 Mon Sep 17 00:00:00 2001 From: Luigi Dell'Aquila Date: Thu, 22 May 2025 09:49:14 +0200 Subject: [PATCH 7/7] Implement review suggestions --- .../optimizer/LocalLogicalPlanOptimizer.java | 4 +-- ...va => ReplaceFieldWithConstantOrNull.java} | 29 ++++++++++++------- 2 files changed, 20 insertions(+), 13 deletions(-) rename x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/{ReplaceFieldWithConstant.java => ReplaceFieldWithConstantOrNull.java} (85%) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java index 6a150da79cca3..0c2ecdcb71fe7 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java @@ -12,7 +12,7 @@ import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.InferIsNotNull; import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.InferNonNullAggConstraint; import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.LocalPropagateEmptyRelation; -import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.ReplaceFieldWithConstant; +import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.ReplaceFieldWithConstantOrNull; import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.ReplaceTopNWithLimitAndSort; import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.rule.ParameterizedRuleExecutor; @@ -39,7 +39,7 @@ public class LocalLogicalPlanOptimizer extends ParameterizedRuleExecutor { +public class ReplaceFieldWithConstantOrNull extends ParameterizedRule { @Override public LogicalPlan apply(LogicalPlan plan, LocalLogicalOptimizerContext localLogicalOptimizerContext) { var lookupFieldsBuilder = AttributeSet.builder(); - Map attrToValue = new HashMap<>(); + Map attrToConstant = new HashMap<>(); plan.forEachUp(EsRelation.class, esRelation -> { - // Looking only for indices in LOOKUP mode is correct: during parsing, we assign the expected mode and even if a lookup index + // Looking for indices in LOOKUP mode is correct: during parsing, we assign the expected mode and even if a lookup index // is used in the FROM command, it will not be marked with LOOKUP mode there - but STANDARD. // It seems like we could instead just look for JOINs and walk down their right hand side to find lookup fields - but this does // not work as this rule also gets called just on the right hand side of a JOIN, which means that we don't always know that @@ -62,7 +62,7 @@ else if (esRelation.indexMode() == IndexMode.STANDARD) { // Do not use the attribute name, this can deviate from the field name for union types; use fieldName() instead. var val = localLogicalOptimizerContext.searchStats().constantValue(fa.fieldName()); if (val != null) { - attrToValue.put(attribute, Literal.of(attribute, val)); + attrToConstant.put(attribute, Literal.of(attribute, val)); } } } @@ -76,10 +76,14 @@ else if (esRelation.indexMode() == IndexMode.STANDARD) { || localLogicalOptimizerContext.searchStats().exists(f.fieldName()) || lookupFields.contains(f); - return plan.transformUp(p -> missingToNull(p, shouldBeRetained, attrToValue)); + return plan.transformUp(p -> replaceWithNullOrConstant(p, shouldBeRetained, attrToConstant)); } - private LogicalPlan missingToNull(LogicalPlan plan, Predicate shouldBeRetained, Map constants) { + private LogicalPlan replaceWithNullOrConstant( + LogicalPlan plan, + Predicate shouldBeRetained, + Map attrToConstant + ) { if (plan instanceof EsRelation relation) { // For any missing field, place an Eval right after the EsRelation to assign null values to that attribute (using the same name // id!), thus avoiding that InsertFieldExtrations inserts a field extraction later. @@ -133,10 +137,13 @@ private LogicalPlan missingToNull(LogicalPlan plan, Predicate sh || plan instanceof OrderBy || plan instanceof RegexExtract || plan instanceof TopN) { - return plan.transformExpressionsOnlyUp( - FieldAttribute.class, - f -> constants.getOrDefault(f, shouldBeRetained.test(f) ? f : Literal.of(f, null)) - ); + return plan.transformExpressionsOnlyUp(FieldAttribute.class, f -> { + if (attrToConstant.containsKey(f)) {// handle constant values field and use the value itself instead + return attrToConstant.get(f); + } else {// handle missing fields and replace them with null + return shouldBeRetained.test(f) ? f : Literal.of(f, null); + } + }); } return plan;