diff --git a/docs/changelog/115624.yaml b/docs/changelog/115624.yaml new file mode 100644 index 0000000000000..1992ed65679ca --- /dev/null +++ b/docs/changelog/115624.yaml @@ -0,0 +1,7 @@ +pr: 115624 +summary: "ES|QL: fix LIMIT pushdown past MV_EXPAND" +area: ES|QL +type: bug +issues: + - 102084 + - 102061 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mv_expand.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mv_expand.csv-spec index 3a1ae3985e129..2a7c092798404 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mv_expand.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mv_expand.csv-spec @@ -324,3 +324,83 @@ from employees | where emp_no == 10001 | keep * | mv_expand first_name; avg_worked_seconds:long | birth_date:date | emp_no:integer | first_name:keyword | gender:keyword | height:double | height.float:double | height.half_float:double | height.scaled_float:double | hire_date:date | is_rehired:boolean | job_positions:keyword | languages:integer | languages.byte:integer | languages.long:long | languages.short:integer | last_name:keyword | salary:integer | salary_change:double | salary_change.int:integer | salary_change.keyword:keyword | salary_change.long:long | still_hired:boolean 268728049 | 1953-09-02T00:00:00.000Z | 10001 | Georgi | M | 2.03 | 2.0299999713897705 | 2.029296875 | 2.0300000000000002 | 1986-06-26T00:00:00.000Z | [false, true] | [Accountant, Senior Python Developer] | 2 | 2 | 2 | 2 | Facello | 57305 | 1.19 | 1 | 1.19 | 1 | true ; + + +// see https://github.com/elastic/elasticsearch/issues/102061 +sortMvExpand +required_capability: add_limit_inside_mv_expand +row a = 1 | sort a | mv_expand a; + +a:integer +1 +; + +// see https://github.com/elastic/elasticsearch/issues/102061 +sortMvExpandFromIndex +required_capability: add_limit_inside_mv_expand +from employees | sort emp_no | mv_expand emp_no | limit 1 | keep emp_no; + +emp_no:integer +10001 +; + + +// see https://github.com/elastic/elasticsearch/issues/102061 +limitSortMvExpand +required_capability: add_limit_inside_mv_expand +row a = 1 | limit 1 | sort a | mv_expand a; + +a:integer +1 +; + + +// see https://github.com/elastic/elasticsearch/issues/102061 +limitSortMultipleMvExpand +required_capability: add_limit_inside_mv_expand +row a = [1, 2, 3, 4, 5], b = 2, c = 3 | sort a | mv_expand a | mv_expand b | mv_expand c | limit 3; + +a:integer | b:integer | c:integer +1 | 2 | 3 +2 | 2 | 3 +3 | 2 | 3 +; + + +multipleLimitSortMultipleMvExpand +required_capability: add_limit_inside_mv_expand +row a = [1, 2, 3, 4, 5], b = 2, c = 3 | sort a | mv_expand a | limit 2 | mv_expand b | mv_expand c | limit 3; + +a:integer | b:integer | c:integer +1 | 2 | 3 +2 | 2 | 3 +; + + +multipleLimitSortMultipleMvExpand2 +required_capability: add_limit_inside_mv_expand +row a = [1, 2, 3, 4, 5], b = 2, c = 3 | sort a | mv_expand a | limit 3 | mv_expand b | mv_expand c | limit 2; + +a:integer | b:integer | c:integer +1 | 2 | 3 +2 | 2 | 3 +; + + +//see https://github.com/elastic/elasticsearch/issues/102084 +whereMvExpand +required_capability: add_limit_inside_mv_expand +row a = 1, b = -15 | where b > 3 | mv_expand b; + +a:integer | b:integer +; + + +//see https://github.com/elastic/elasticsearch/issues/102084 +whereMvExpandOnIndex +required_capability: add_limit_inside_mv_expand +from employees | where emp_no == 10003 | mv_expand first_name | keep first_name; + +first_name:keyword +Parto +; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 196a864db2c15..6439df6ee71ee 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -441,7 +441,12 @@ public enum Cap { /** * Support simplified syntax for named parameters for field and function names. */ - NAMED_PARAMETER_FOR_FIELD_AND_FUNCTION_NAMES_SIMPLIFIED_SYNTAX(Build.current().isSnapshot()); + NAMED_PARAMETER_FOR_FIELD_AND_FUNCTION_NAMES_SIMPLIFIED_SYNTAX(Build.current().isSnapshot()), + + /** + * Fix pushdown of LIMIT past MV_EXPAND + */ + ADD_LIMIT_INSIDE_MV_EXPAND; private final boolean enabled; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index b18f58b0a43cb..4768af4bc8edb 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -507,7 +507,8 @@ private LogicalPlan resolveMvExpand(MvExpand p, List childrenOutput) resolved, resolved.resolved() ? new ReferenceAttribute(resolved.source(), resolved.name(), resolved.dataType(), resolved.nullable(), null, false) - : resolved + : resolved, + p.limit() ); } return p; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java index a1da269f896da..fb3a1b5179beb 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java @@ -19,7 +19,6 @@ import org.elasticsearch.xpack.esql.optimizer.rules.logical.CombineProjections; import org.elasticsearch.xpack.esql.optimizer.rules.logical.ConstantFolding; import org.elasticsearch.xpack.esql.optimizer.rules.logical.ConvertStringToByteRef; -import org.elasticsearch.xpack.esql.optimizer.rules.logical.DuplicateLimitAfterMvExpand; import org.elasticsearch.xpack.esql.optimizer.rules.logical.FoldNull; import org.elasticsearch.xpack.esql.optimizer.rules.logical.LiteralsOnTheRight; import org.elasticsearch.xpack.esql.optimizer.rules.logical.PartiallyFoldCase; @@ -174,7 +173,6 @@ protected static Batch operators() { new PruneColumns(), new PruneLiteralsInOrderBy(), new PushDownAndCombineLimits(), - new DuplicateLimitAfterMvExpand(), new PushDownAndCombineFilters(), new PushDownEval(), new PushDownRegexExtract(), diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/DuplicateLimitAfterMvExpand.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/DuplicateLimitAfterMvExpand.java deleted file mode 100644 index 8985f4ab24705..0000000000000 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/DuplicateLimitAfterMvExpand.java +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.esql.optimizer.rules.logical; - -import org.elasticsearch.xpack.esql.core.expression.AttributeSet; -import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; -import org.elasticsearch.xpack.esql.plan.logical.Aggregate; -import org.elasticsearch.xpack.esql.plan.logical.Enrich; -import org.elasticsearch.xpack.esql.plan.logical.Eval; -import org.elasticsearch.xpack.esql.plan.logical.Filter; -import org.elasticsearch.xpack.esql.plan.logical.Limit; -import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; -import org.elasticsearch.xpack.esql.plan.logical.MvExpand; -import org.elasticsearch.xpack.esql.plan.logical.OrderBy; -import org.elasticsearch.xpack.esql.plan.logical.Project; -import org.elasticsearch.xpack.esql.plan.logical.RegexExtract; -import org.elasticsearch.xpack.esql.plan.logical.UnaryPlan; - -public final class DuplicateLimitAfterMvExpand extends OptimizerRules.OptimizerRule { - - @Override - protected LogicalPlan rule(Limit limit) { - var child = limit.child(); - var shouldSkip = child instanceof Eval - || child instanceof Project - || child instanceof RegexExtract - || child instanceof Enrich - || child instanceof Limit; - - if (shouldSkip == false && child instanceof UnaryPlan unary) { - MvExpand mvExpand = descendantMvExpand(unary); - if (mvExpand != null) { - Limit limitBeforeMvExpand = limitBeforeMvExpand(mvExpand); - // if there is no "appropriate" limit before mv_expand, then push down a copy of the one after it so that: - // - a possible TopN is properly built as low as possible in the tree (closed to Lucene) - // - the input of mv_expand is as small as possible before it is expanded (less rows to inflate and occupy memory) - if (limitBeforeMvExpand == null) { - var duplicateLimit = new Limit(limit.source(), limit.limit(), mvExpand.child()); - return limit.replaceChild(propagateDuplicateLimitUntilMvExpand(duplicateLimit, mvExpand, unary)); - } - } - } - return limit; - } - - private static MvExpand descendantMvExpand(UnaryPlan unary) { - UnaryPlan plan = unary; - AttributeSet filterReferences = new AttributeSet(); - while (plan instanceof Aggregate == false) { - if (plan instanceof MvExpand mve) { - // don't return the mv_expand that has a filter after it which uses the expanded values - // since this will trigger the use of a potentially incorrect (too restrictive) limit further down in the tree - if (filterReferences.isEmpty() == false) { - if (filterReferences.contains(mve.target()) // the same field or reference attribute is used in mv_expand AND filter - || mve.target() instanceof ReferenceAttribute // or the mv_expand attr hasn't yet been resolved to a field attr - // or not all filter references have been resolved to field attributes - || filterReferences.stream().anyMatch(ref -> ref instanceof ReferenceAttribute)) { - return null; - } - } - return mve; - } else if (plan instanceof Filter filter) { - // gather all the filters' references to be checked later when a mv_expand is found - filterReferences.addAll(filter.references()); - } else if (plan instanceof OrderBy) { - // ordering after mv_expand COULD break the order of the results, so the limit shouldn't be copied past mv_expand - // something like from test | sort emp_no | mv_expand job_positions | sort first_name | limit 5 - // (the sort first_name likely changes the order of the docs after sort emp_no, so "limit 5" shouldn't be copied down - return null; - } - - if (plan.child() instanceof UnaryPlan unaryPlan) { - plan = unaryPlan; - } else { - break; - } - } - return null; - } - - private static Limit limitBeforeMvExpand(MvExpand mvExpand) { - UnaryPlan plan = mvExpand; - while (plan instanceof Aggregate == false) { - if (plan instanceof Limit limit) { - return limit; - } - if (plan.child() instanceof UnaryPlan unaryPlan) { - plan = unaryPlan; - } else { - break; - } - } - return null; - } - - private LogicalPlan propagateDuplicateLimitUntilMvExpand(Limit duplicateLimit, MvExpand mvExpand, UnaryPlan child) { - if (child == mvExpand) { - return mvExpand.replaceChild(duplicateLimit); - } else { - return child.replaceChild(propagateDuplicateLimitUntilMvExpand(duplicateLimit, mvExpand, (UnaryPlan) child.child())); - } - } -} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PushDownAndCombineLimits.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PushDownAndCombineLimits.java index 08f32b094a95a..153efa5b5c233 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PushDownAndCombineLimits.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PushDownAndCombineLimits.java @@ -33,6 +33,21 @@ public LogicalPlan rule(Limit limit) { } else if (limit.child() instanceof UnaryPlan unary) { if (unary instanceof Eval || unary instanceof Project || unary instanceof RegexExtract || unary instanceof Enrich) { return unary.replaceChild(limit.replaceChild(unary.child())); + } else if (unary instanceof MvExpand mvx) { + // MV_EXPAND can increase the number of rows, so we cannot just push the limit down + // (we also have to preserve the LIMIT afterwards) + // + // To avoid infinite loops, ie. + // | MV_EXPAND | LIMIT -> | LIMIT | MV_EXPAND | LIMIT -> ... | MV_EXPAND | LIMIT + // we add an inner limit to MvExpand and just push down the existing limit, ie. + // | MV_EXPAND | LIMIT N -> | LIMIT N | MV_EXPAND (with limit N) + var limitSource = limit.limit(); + var limitVal = (int) limitSource.fold(); + Integer mvxLimit = mvx.limit(); + if (mvxLimit == null || mvxLimit > limitVal) { + mvx = new MvExpand(mvx.source(), mvx.child(), mvx.target(), mvx.expanded(), limitVal); + } + return mvx.replaceChild(limit.replaceChild(mvx.child())); } // check if there's a 'visible' descendant limit lower than the current one // and if so, align the current limit since it adds no value diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/MvExpand.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/MvExpand.java index 46ebc43d698a6..949e4906e5033 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/MvExpand.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/MvExpand.java @@ -27,13 +27,19 @@ public class MvExpand extends UnaryPlan { private final NamedExpression target; private final Attribute expanded; + private final Integer limit; private List output; public MvExpand(Source source, LogicalPlan child, NamedExpression target, Attribute expanded) { + this(source, child, target, expanded, null); + } + + public MvExpand(Source source, LogicalPlan child, NamedExpression target, Attribute expanded, Integer limit) { super(source, child); this.target = target; this.expanded = expanded; + this.limit = limit; } private MvExpand(StreamInput in) throws IOException { @@ -41,7 +47,8 @@ private MvExpand(StreamInput in) throws IOException { Source.readFrom((PlanStreamInput) in), in.readNamedWriteable(LogicalPlan.class), in.readNamedWriteable(NamedExpression.class), - in.readNamedWriteable(Attribute.class) + in.readNamedWriteable(Attribute.class), + null // we only need this on the coordinator ); } @@ -51,6 +58,7 @@ public void writeTo(StreamOutput out) throws IOException { out.writeNamedWriteable(child()); out.writeNamedWriteable(target()); out.writeNamedWriteable(expanded()); + assert limit == null; } @Override @@ -78,6 +86,10 @@ public Attribute expanded() { return expanded; } + public Integer limit() { + return limit; + } + @Override protected AttributeSet computeReferences() { return target.references(); @@ -94,7 +106,7 @@ public boolean expressionsResolved() { @Override public UnaryPlan replaceChild(LogicalPlan newChild) { - return new MvExpand(source(), newChild, target, expanded); + return new MvExpand(source(), newChild, target, expanded, limit); } @Override @@ -107,12 +119,12 @@ public List output() { @Override protected NodeInfo info() { - return NodeInfo.create(this, MvExpand::new, child(), target, expanded); + return NodeInfo.create(this, MvExpand::new, child(), target, expanded, limit); } @Override public int hashCode() { - return Objects.hash(super.hashCode(), target, expanded); + return Objects.hash(super.hashCode(), target, expanded, limit); } @Override @@ -120,6 +132,7 @@ public boolean equals(Object obj) { if (false == super.equals(obj)) { return false; } - return Objects.equals(target, ((MvExpand) obj).target) && Objects.equals(expanded, ((MvExpand) obj).expanded); + MvExpand other = ((MvExpand) obj); + return Objects.equals(target, other.target) && Objects.equals(expanded, other.expanded) && Objects.equals(limit, other.limit); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/Mapper.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/Mapper.java index 152c492a34433..a8f820c8ef3fd 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/Mapper.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/Mapper.java @@ -11,6 +11,9 @@ import org.elasticsearch.compute.aggregation.AggregatorMode; import org.elasticsearch.xpack.esql.EsqlIllegalArgumentException; import org.elasticsearch.xpack.esql.core.expression.Attribute; +import org.elasticsearch.xpack.esql.core.expression.Literal; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.expression.function.EsqlFunctionRegistry; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.BinaryPlan; @@ -228,7 +231,13 @@ private PhysicalPlan map(UnaryPlan p, PhysicalPlan child) { } if (p instanceof MvExpand mvExpand) { - return new MvExpandExec(mvExpand.source(), map(mvExpand.child()), mvExpand.target(), mvExpand.expanded()); + MvExpandExec result = new MvExpandExec(mvExpand.source(), map(mvExpand.child()), mvExpand.target(), mvExpand.expanded()); + if (mvExpand.limit() != null) { + // MvExpand could have an inner limit + // see PushDownAndCombineLimits rule + return new LimitExec(result.source(), result, new Literal(Source.EMPTY, mvExpand.limit(), DataType.INTEGER)); + } + return result; } // diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizerTests.java index e556d43a471c3..baef20081a4f2 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizerTests.java @@ -73,6 +73,7 @@ import static org.elasticsearch.xpack.esql.EsqlTestUtils.withDefaultLimitWarning; import static org.elasticsearch.xpack.esql.core.tree.Source.EMPTY; import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.nullValue; @@ -193,11 +194,10 @@ public void testMissingFieldInSort() { /** * Expects - * EsqlProject[[first_name{f}#6]] - * \_Limit[1000[INTEGER]] - * \_MvExpand[last_name{f}#9,last_name{r}#15] - * \_Limit[1000[INTEGER]] - * \_EsRelation[test][_meta_field{f}#11, emp_no{f}#5, first_name{f}#6, ge..] + * EsqlProject[[first_name{f}#9, last_name{r}#18]] + * \_MvExpand[last_name{f}#12,last_name{r}#18,1000] + * \_Limit[1000[INTEGER]] + * \_EsRelation[test][_meta_field{f}#14, emp_no{f}#8, first_name{f}#9, ge..] */ public void testMissingFieldInMvExpand() { var plan = plan(""" @@ -213,11 +213,8 @@ public void testMissingFieldInMvExpand() { var projections = project.projections(); assertThat(Expressions.names(projections), contains("first_name", "last_name")); - var limit = as(project.child(), Limit.class); - // MvExpand cannot be optimized (yet) because the target NamedExpression cannot be replaced with a NULL literal - // https://github.com/elastic/elasticsearch/issues/109974 - // See LocalLogicalPlanOptimizer.ReplaceMissingFieldWithNull - var mvExpand = as(limit.child(), MvExpand.class); + var mvExpand = as(project.child(), MvExpand.class); + assertThat(mvExpand.limit(), equalTo(1000)); var limit2 = as(mvExpand.child(), Limit.class); as(limit2.child(), EsRelation.class); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index ff7675504d6ff..59ba8352d2aaf 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -1239,11 +1239,10 @@ public void testDontCombineOrderByThroughMvExpand() { /** * Expected - * Limit[1000[INTEGER]] - * \_MvExpand[x{r}#159] - * \_EsqlProject[[first_name{f}#162 AS x]] - * \_Limit[1000[INTEGER]] - * \_EsRelation[test][first_name{f}#162] + * MvExpand[x{r}#4,x{r}#18,1000] + * \_EsqlProject[[first_name{f}#9 AS x]] + * \_Limit[1000[INTEGER]] + * \_EsRelation[test][_meta_field{f}#14, emp_no{f}#8, first_name{f}#9, ge..] */ public void testCopyDefaultLimitPastMvExpand() { LogicalPlan plan = optimizedPlan(""" @@ -1253,21 +1252,20 @@ public void testCopyDefaultLimitPastMvExpand() { | mv_expand x """); - var limit = as(plan, Limit.class); - var mvExpand = as(limit.child(), MvExpand.class); + var mvExpand = as(plan, MvExpand.class); + assertThat(mvExpand.limit(), equalTo(1000)); var keep = as(mvExpand.child(), EsqlProject.class); var limitPastMvExpand = as(keep.child(), Limit.class); - assertThat(limitPastMvExpand.limit(), equalTo(limit.limit())); + assertThat(limitPastMvExpand.limit().fold(), equalTo(1000)); as(limitPastMvExpand.child(), EsRelation.class); } /** * Expected - * Limit[10[INTEGER]] - * \_MvExpand[first_name{f}#155] - * \_EsqlProject[[first_name{f}#155, last_name{f}#156]] - * \_Limit[1[INTEGER]] - * \_EsRelation[test][first_name{f}#155, last_name{f}#156] + * MvExpand[first_name{f}#7,first_name{r}#16,10] + * \_EsqlProject[[first_name{f}#7, last_name{f}#10]] + * \_Limit[1[INTEGER]] + * \_EsRelation[test][_meta_field{f}#12, emp_no{f}#6, first_name{f}#7, ge..] */ public void testDontPushDownLimitPastMvExpand() { LogicalPlan plan = optimizedPlan(""" @@ -1277,28 +1275,26 @@ public void testDontPushDownLimitPastMvExpand() { | mv_expand first_name | limit 10"""); - var limit = as(plan, Limit.class); - assertThat(limit.limit().fold(), equalTo(10)); - var mvExpand = as(limit.child(), MvExpand.class); + var mvExpand = as(plan, MvExpand.class); + assertThat(mvExpand.limit(), equalTo(10)); var project = as(mvExpand.child(), EsqlProject.class); - limit = as(project.child(), Limit.class); + var limit = as(project.child(), Limit.class); assertThat(limit.limit().fold(), equalTo(1)); as(limit.child(), EsRelation.class); } /** * Expected - * EsqlProject[[emp_no{f}#141, first_name{f}#142, languages{f}#143, lll{r}#132, salary{f}#147]] - * \_TopN[[Order[salary{f}#147,DESC,FIRST], Order[first_name{f}#142,ASC,LAST]],5[INTEGER]] - * \_Limit[5[INTEGER]] - * \_MvExpand[salary{f}#147] - * \_Eval[[languages{f}#143 + 5[INTEGER] AS lll]] - * \_Filter[languages{f}#143 > 1[INTEGER]] - * \_Limit[10[INTEGER]] - * \_MvExpand[first_name{f}#142] - * \_TopN[[Order[emp_no{f}#141,DESC,FIRST]],10[INTEGER]] - * \_Filter[emp_no{f}#141 < 10006[INTEGER]] - * \_EsRelation[test][emp_no{f}#141, first_name{f}#142, languages{f}#1..] + * EsqlProject[[emp_no{f}#19, first_name{r}#29, languages{f}#22, lll{r}#9, salary{r}#30]] + * \_TopN[[Order[salary{r}#30,DESC,FIRST]],5[INTEGER]] + * \_MvExpand[salary{f}#24,salary{r}#30,5] + * \_Eval[[languages{f}#22 + 5[INTEGER] AS lll]] + * \_Limit[5[INTEGER]] + * \_Filter[languages{f}#22 > 1[INTEGER]] + * \_MvExpand[first_name{f}#20,first_name{r}#29,10] + * \_TopN[[Order[emp_no{f}#19,DESC,FIRST]],10[INTEGER]] + * \_Filter[emp_no{f}#19 ≤ 10006[INTEGER]] + * \_EsRelation[test][_meta_field{f}#25, emp_no{f}#19, first_name{f}#20, ..] */ public void testMultipleMvExpandWithSortAndLimit() { LogicalPlan plan = optimizedPlan(""" @@ -1319,14 +1315,13 @@ public void testMultipleMvExpandWithSortAndLimit() { var topN = as(keep.child(), TopN.class); assertThat(topN.limit().fold(), equalTo(5)); assertThat(orderNames(topN), contains("salary")); - var limit = as(topN.child(), Limit.class); - assertThat(limit.limit().fold(), equalTo(5)); - var mvExp = as(limit.child(), MvExpand.class); + var mvExp = as(topN.child(), MvExpand.class); + assertThat(mvExp.limit(), equalTo(5)); var eval = as(mvExp.child(), Eval.class); - var filter = as(eval.child(), Filter.class); - limit = as(filter.child(), Limit.class); - assertThat(limit.limit().fold(), equalTo(10)); - mvExp = as(limit.child(), MvExpand.class); + var limit5 = as(eval.child(), Limit.class); + var filter = as(limit5.child(), Filter.class); + mvExp = as(filter.child(), MvExpand.class); + assertThat(mvExp.limit(), equalTo(10)); topN = as(mvExp.child(), TopN.class); assertThat(topN.limit().fold(), equalTo(10)); filter = as(topN.child(), Filter.class); @@ -1434,10 +1429,9 @@ public void testDontPushDownLimitPastAggregate_AndMvExpand() { * Limit[5[INTEGER]] * \_Filter[ISNOTNULL(first_name{r}#22)] * \_Aggregate[STANDARD,[first_name{r}#22],[MAX(salary{f}#17,true[BOOLEAN]) AS max_s, first_name{r}#22]] - * \_Limit[50[INTEGER]] - * \_MvExpand[first_name{f}#13,first_name{r}#22] - * \_Limit[50[INTEGER]] - * \_EsRelation[test][_meta_field{f}#18, emp_no{f}#12, first_name{f}#13, ..] + * \_MvExpand[first_name{f}#13,first_name{r}#22,50] + * \_Limit[50[INTEGER]] + * \_EsRelation[test][_meta_field{f}#18, emp_no{f}#12, first_name{f}#13, ..] */ public void testPushDown_TheRightLimit_PastMvExpand() { LogicalPlan plan = optimizedPlan(""" @@ -1453,9 +1447,8 @@ public void testPushDown_TheRightLimit_PastMvExpand() { assertThat(limit.limit().fold(), equalTo(5)); var filter = as(limit.child(), Filter.class); var agg = as(filter.child(), Aggregate.class); - limit = as(agg.child(), Limit.class); - assertThat(limit.limit().fold(), equalTo(50)); - var mvExp = as(limit.child(), MvExpand.class); + var mvExp = as(agg.child(), MvExpand.class); + assertThat(mvExp.limit(), equalTo(50)); limit = as(mvExp.child(), Limit.class); assertThat(limit.limit().fold(), equalTo(50)); as(limit.child(), EsRelation.class); @@ -1492,6 +1485,143 @@ public void testPushDownLimit_PastEvalAndMvExpand() { as(topN.child(), EsRelation.class); } + /** + * Expected + * EsqlProject[[emp_no{f}#12, first_name{r}#22, salary{f}#17]] + * \_TopN[[Order[salary{f}#17,ASC,LAST], Order[first_name{r}#22,ASC,LAST]],1000[INTEGER]] + * \_Filter[gender{f}#14 == [46][KEYWORD] AND WILDCARDLIKE(first_name{r}#22)] + * \_MvExpand[first_name{f}#13,first_name{r}#22,null] + * \_TopN[[Order[emp_no{f}#12,ASC,LAST]],10000[INTEGER]] + * \_EsRelation[test][_meta_field{f}#18, emp_no{f}#12, first_name{f}#13, ..] + */ + public void testAddDefaultLimit_BeforeMvExpand_WithFilterOnExpandedField_ResultTruncationDefaultSize() { + LogicalPlan plan = optimizedPlan(""" + from test + | sort emp_no + | mv_expand first_name + | where gender == "F" + | where first_name LIKE "R*" + | keep emp_no, first_name, salary + | sort salary, first_name"""); + + var keep = as(plan, EsqlProject.class); + var topN = as(keep.child(), TopN.class); + assertThat(topN.limit().fold(), equalTo(1000)); + assertThat(orderNames(topN), contains("salary", "first_name")); + var filter = as(topN.child(), Filter.class); + assertThat(filter.condition(), instanceOf(And.class)); + var mvExp = as(filter.child(), MvExpand.class); + topN = as(mvExp.child(), TopN.class); // TODO is it correct? Double-check AddDefaultTopN rule + assertThat(orderNames(topN), contains("emp_no")); + as(topN.child(), EsRelation.class); + } + + /** + * Expected + * + * MvExpand[first_name{f}#7,first_name{r}#16,10] + * \_TopN[[Order[emp_no{f}#6,DESC,FIRST]],10[INTEGER]] + * \_Filter[emp_no{f}#6 ≤ 10006[INTEGER]] + * \_EsRelation[test][_meta_field{f}#12, emp_no{f}#6, first_name{f}#7, ge..] + */ + public void testFilterWithSortBeforeMvExpand() { + LogicalPlan plan = optimizedPlan(""" + from test + | where emp_no <= 10006 + | sort emp_no desc + | mv_expand first_name + | limit 10"""); + + var mvExp = as(plan, MvExpand.class); + assertThat(mvExp.limit(), equalTo(10)); + var topN = as(mvExp.child(), TopN.class); + assertThat(topN.limit().fold(), equalTo(10)); + assertThat(orderNames(topN), contains("emp_no")); + var filter = as(topN.child(), Filter.class); + as(filter.child(), EsRelation.class); + } + + /** + * Expected + * + * TopN[[Order[first_name{f}#10,ASC,LAST]],500[INTEGER]] + * \_MvExpand[last_name{f}#13,last_name{r}#20,null] + * \_Filter[emp_no{r}#19 > 10050[INTEGER]] + * \_MvExpand[emp_no{f}#9,emp_no{r}#19,null] + * \_EsRelation[test][_meta_field{f}#15, emp_no{f}#9, first_name{f}#10, g..] + */ + public void testMultiMvExpand_SortDownBelow() { + LogicalPlan plan = optimizedPlan(""" + from test + | sort last_name ASC + | mv_expand emp_no + | where emp_no > 10050 + | mv_expand last_name + | sort first_name"""); + + var topN = as(plan, TopN.class); + assertThat(topN.limit().fold(), equalTo(1000)); + assertThat(orderNames(topN), contains("first_name")); + var mvExpand = as(topN.child(), MvExpand.class); + var filter = as(mvExpand.child(), Filter.class); + mvExpand = as(filter.child(), MvExpand.class); + var topN2 = as(mvExpand.child(), TopN.class); // TODO is it correct? Double-check AddDefaultTopN rule + as(topN2.child(), EsRelation.class); + } + + /** + * Expected + * + * MvExpand[c{r}#7,c{r}#16,10000] + * \_EsqlProject[[c{r}#7, a{r}#3]] + * \_TopN[[Order[a{r}#3,ASC,FIRST]],7300[INTEGER]] + * \_MvExpand[b{r}#5,b{r}#15,7300] + * \_Limit[7300[INTEGER]] + * \_Row[[null[NULL] AS a, 123[INTEGER] AS b, 234[INTEGER] AS c]] + */ + public void testLimitThenSortBeforeMvExpand() { + LogicalPlan plan = optimizedPlan(""" + row a = null, b = 123, c = 234 + | mv_expand b + | limit 7300 + | keep c, a + | sort a NULLS FIRST + | mv_expand c"""); + + var mvExpand = as(plan, MvExpand.class); + assertThat(mvExpand.limit(), equalTo(10000)); + var project = as(mvExpand.child(), EsqlProject.class); + var topN = as(project.child(), TopN.class); + assertThat(topN.limit().fold(), equalTo(7300)); + assertThat(orderNames(topN), contains("a")); + mvExpand = as(topN.child(), MvExpand.class); + var limit = as(mvExpand.child(), Limit.class); + assertThat(limit.limit().fold(), equalTo(7300)); + as(limit.child(), Row.class); + } + + /** + * Expected + * TopN[[Order[first_name{r}#16,ASC,LAST]],10000[INTEGER]] + * \_MvExpand[first_name{f}#7,first_name{r}#16] + * \_EsRelation[test][_meta_field{f}#12, emp_no{f}#6, first_name{f}#7, ge..] + */ + public void testRemoveUnusedSortBeforeMvExpand_DefaultLimit10000() { + LogicalPlan plan = optimizedPlan(""" + from test + | sort emp_no + | mv_expand first_name + | sort first_name + | limit 15000"""); + + var topN = as(plan, TopN.class); + assertThat(orderNames(topN), contains("first_name")); + assertThat(topN.limit().fold(), equalTo(10000)); + var mvExpand = as(topN.child(), MvExpand.class); + var topN2 = as(mvExpand.child(), TopN.class); // TODO is it correct? Double-check AddDefaultTopN rule + as(topN2.child(), EsRelation.class); + } + /** * Expected * EsqlProject[[emp_no{f}#104, first_name{f}#105, salary{f}#106]] @@ -1597,6 +1727,65 @@ public void testAddDefaultLimit_BeforeMvExpand_WithFilterOnExpandedFieldAlias() as(topN.child(), EsRelation.class); } + /** + * Expected: + * MvExpand[a{r}#1402,a{r}#1406,1000] + * \_TopN[[Order[a{r}#1402,ASC,LAST]],1000[INTEGER]] + * \_Row[[1[INTEGER] AS a]] + */ + public void testSortMvExpand() { + LogicalPlan plan = optimizedPlan(""" + row a = 1 + | sort a + | mv_expand a"""); + + var expand = as(plan, MvExpand.class); + assertThat(expand.limit(), equalTo(1000)); + var topN = as(expand.child(), TopN.class); + var row = as(topN.child(), Row.class); + } + + /** + * Expected: + * MvExpand[emp_no{f}#5,emp_no{r}#15,20] + * \_TopN[[Order[emp_no{f}#5,ASC,LAST]],20[INTEGER]] + * \_EsRelation[test][_meta_field{f}#11, emp_no{f}#5, first_name{f}#6, ge..] + */ + public void testSortMvExpandLimit() { + LogicalPlan plan = optimizedPlan(""" + from test + | sort emp_no + | mv_expand emp_no + | limit 20"""); + + var expand = as(plan, MvExpand.class); + assertThat(expand.limit(), equalTo(20)); + var topN = as(expand.child(), TopN.class); + assertThat(topN.limit().fold(), is(20)); + var row = as(topN.child(), EsRelation.class); + } + + /** + * Expected: + * MvExpand[b{r}#5,b{r}#9,1000] + * \_Limit[1000[INTEGER]] + * \_Row[[1[INTEGER] AS a, -15[INTEGER] AS b]] + * + * see https://github.com/elastic/elasticsearch/issues/102084 + */ + public void testWhereMvExpand() { + LogicalPlan plan = optimizedPlan(""" + row a = 1, b = -15 + | where b < 3 + | mv_expand b"""); + + var expand = as(plan, MvExpand.class); + assertThat(expand.limit(), equalTo(1000)); + var limit2 = as(expand.child(), Limit.class); + assertThat(limit2.limit().fold(), is(1000)); + var row = as(limit2.child(), Row.class); + } + private static List orderNames(TopN topN) { return topN.order().stream().map(o -> as(o.child(), NamedExpression.class).name()).toList(); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java index 8019dbf77ffbf..97de0caa93b5c 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java @@ -1677,7 +1677,8 @@ public void testParamForIdentifier() { List.of(new Order(EMPTY, attribute("f.11..f.12.*"), Order.OrderDirection.ASC, Order.NullsPosition.LAST)) ), attribute("f.*.13.f.14*"), - attribute("f.*.13.f.14*") + attribute("f.*.13.f.14*"), + null ), statement( """