Skip to content
Open
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
ca954c4
Push down MvExpand past Project
kanoshiou Oct 10, 2025
55c37c2
Update docs/changelog/136398.yaml
kanoshiou Oct 10, 2025
91f86d6
Merge remote-tracking branch 'origin/main' into push-down-mv_expand-p…
kanoshiou Oct 24, 2025
a8b3897
Insert an eval if target in mv_expand is aliased in child project
kanoshiou Oct 26, 2025
83aa387
Add tests for #136596
kanoshiou Oct 26, 2025
2095c44
Update docs/changelog/136398.yaml
kanoshiou Oct 26, 2025
8d64398
Merge branch 'refs/heads/main' into push-down-mv_expand-past-project
kanoshiou Oct 26, 2025
a46cc7d
Merge branch 'refs/heads/main' into push-down-mv_expand-past-project
kanoshiou Nov 5, 2025
471124c
Merge branch 'refs/heads/main' into push-down-mv_expand-past-project
kanoshiou Nov 9, 2025
d08a6ea
refactor(esql): Improve MvExpand push-down logic in logical optimizer
kanoshiou Nov 9, 2025
075e826
refactor(esql): Improve MvExpand push-down logic in logical optimizer
kanoshiou Nov 9, 2025
6018ce0
Merge branch 'refs/heads/main' into push-down-mv_expand-past-project
kanoshiou Nov 9, 2025
3afecaa
refactor(esql): Improve MvExpand push-down logic in logical optimizer
kanoshiou Nov 10, 2025
2b3d55a
refactor(esql): Improve MvExpand push-down logic in logical optimizer
kanoshiou Nov 10, 2025
8e69eb0
Merge branch 'main' into push-down-mv_expand-past-project
kanoshiou Nov 10, 2025
faea78b
refactor(esql): Improve MvExpand push-down logic in PushDownMvExpandP…
kanoshiou Nov 10, 2025
7f8faf6
Merge branch 'refs/heads/main' into push-down-mv_expand-past-project
kanoshiou Nov 25, 2025
92dacb5
fix: refine `PushDownMvExpandPastProject` logic by correcting the ali…
kanoshiou Nov 28, 2025
eb3f373
Merge branch 'refs/heads/main' into push-down-mv_expand-past-project
kanoshiou Nov 28, 2025
adab9f8
fix(esql): simplify MvExpand pushdown logic by extracting to utility …
kanoshiou Nov 29, 2025
dd86f32
Update test
kanoshiou Nov 29, 2025
b042096
fix(esql): refactor MvExpand pushdown logic to directly manipulate pr…
kanoshiou Nov 29, 2025
d8c221a
Avoid inconsistent plans
kanoshiou Nov 29, 2025
b5ede88
Create a temporary attribute for the UnionAll case
kanoshiou Nov 30, 2025
cd844d0
Add more tests for UnionAll cases
kanoshiou Nov 30, 2025
2c45495
Merge branch 'refs/heads/main' into push-down-mv_expand-past-project
kanoshiou Nov 30, 2025
9410846
Merge branch 'refs/heads/main' into push-down-mv_expand-past-project
kanoshiou Dec 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions docs/changelog/136398.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
pr: 136398
summary: "ESQL: Push down `MvExpand` past `Project`"
area: ES|QL
type: enhancement
issues:
- 136292
- 136596
- 119074
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ public abstract class GenerativeRestTest extends ESRestTestCase implements Query
"Data too large", // Circuit breaker exceptions eg. https://github.com/elastic/elasticsearch/issues/130072
"long overflow", // https://github.com/elastic/elasticsearch/issues/135759
"cannot be cast to class", // https://github.com/elastic/elasticsearch/issues/133992
"can't find input for", // https://github.com/elastic/elasticsearch/issues/136596
"unexpected byte", // https://github.com/elastic/elasticsearch/issues/136598
"out of bounds for length", // https://github.com/elastic/elasticsearch/issues/136851
"optimized incorrectly due to missing references", // https://github.com/elastic/elasticsearch/issues/138231
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -481,3 +481,68 @@ Production | false | true | -1 | 1
Success | false | true | -1 | 1 | null | 1
;


testPushDownMvExpandPastProject
// https://github.com/elastic/elasticsearch/issues/136596
required_capability: push_down_mv_expand_past_project
from partial_mapping_no_source_sample_data,no_mapping_sample_data
| eval spZehshmg = false, `@timestamp` = null, xrRYYdVC = -1193517579093331092
| rename xrRYYdVC AS `goqRirHJaE`, `@timestamp` AS gilMWmQyQ
| sort goqRirHJaE NULLS LAST
| where true
| limit 14644
| mv_expand `spZehshmg`
| keep goqRirHJaE
;

goqRirHJaE:long
-1193517579093331092
-1193517579093331092
-1193517579093331092
-1193517579093331092
-1193517579093331092
-1193517579093331092
-1193517579093331092
-1193517579093331092
-1193517579093331092
-1193517579093331092
-1193517579093331092
-1193517579093331092
-1193517579093331092
-1193517579093331092
;


testPushDownMvExpandPastProject2
required_capability: push_down_mv_expand_past_project
from languages
| eval a = 2
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: maybe we could add some tests where the expanded value is actually an MV, like eval = [1,2].

| rename a AS b
| sort b
| mv_expand b
| keep b
;

b:integer
2
2
2
2
;


testPushDownMvExpandPastProject3
required_capability: push_down_mv_expand_past_project
from languages
| eval a = 2
| rename a AS b
| mv_expand b
| keep b
;

b:integer
2
2
2
2
;
Original file line number Diff line number Diff line change
Expand Up @@ -1688,6 +1688,12 @@ public enum Cap {
* KNN function adds support for k and visit_percentage options
*/
KNN_FUNCTION_OPTIONS_K_VISIT_PERCENTAGE,

/**
* Support for pushing down MV_EXPAND past PROJECT with complex queries.
*/
PUSH_DOWN_MV_EXPAND_PAST_PROJECT,

// Last capability should still have a comma for fewer merge conflicts when adding new ones :)
// This comment prevents the semicolon from being on the previous capability when Spotless formats the file.
;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
import org.elasticsearch.xpack.esql.optimizer.rules.logical.PushDownFilterAndLimitIntoUnionAll;
import org.elasticsearch.xpack.esql.optimizer.rules.logical.PushDownInferencePlan;
import org.elasticsearch.xpack.esql.optimizer.rules.logical.PushDownJoinPastProject;
import org.elasticsearch.xpack.esql.optimizer.rules.logical.PushDownMvExpandPastProject;
import org.elasticsearch.xpack.esql.optimizer.rules.logical.PushDownRegexExtract;
import org.elasticsearch.xpack.esql.optimizer.rules.logical.PushLimitToKnn;
import org.elasticsearch.xpack.esql.optimizer.rules.logical.RemoveStatsOverride;
Expand Down Expand Up @@ -225,6 +226,7 @@ protected static Batch<LogicalPlan> operators() {
new PushDownRegexExtract(),
new PushDownEnrich(),
new PushDownJoinPastProject(),
new PushDownMvExpandPastProject(),
new PushDownAndCombineOrderBy(),
new PushDownFilterAndLimitIntoUnionAll(),
new PruneRedundantOrderBy(),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.esql.optimizer.rules.logical;

import org.elasticsearch.xpack.esql.core.expression.Alias;
import org.elasticsearch.xpack.esql.core.expression.Attribute;
import org.elasticsearch.xpack.esql.core.expression.AttributeMap;
import org.elasticsearch.xpack.esql.core.expression.NamedExpression;
import org.elasticsearch.xpack.esql.plan.logical.Eval;
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
import org.elasticsearch.xpack.esql.plan.logical.MvExpand;
import org.elasticsearch.xpack.esql.plan.logical.Project;

import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;

public final class PushDownMvExpandPastProject extends OptimizerRules.OptimizerRule<MvExpand> {
@Override
protected LogicalPlan rule(MvExpand mvExpand) {
if (mvExpand.child() instanceof Project pj) {
List<NamedExpression> projections = new ArrayList<>(pj.projections());

// Skip if any projection alias shadows an input field name, as injecting an Eval would cause
// duplicate output attributes. This can happen with aliases generated by ResolveUnionTypesInUnionAll.
// Example:
// MvExpand[salary{r}#168,salary{r}#175]
// \_Project[[$$salary$converted_to$keyword{r$}#178 AS salary#168]]
// \_UnionAll[[salary{r}#174, $$salary$converted_to$keyword{r$}#178]]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this case, don't we just need to create a new expanded attribute?

We can create a new (synthetic) reference attribute $$temp$name$#200. Then we'd turn this into

Project[[$$temp$name$#200 AS salary#175]]
\_MvExpand[$$salary$converted_to$keyword{r$}#178,$$temp$name$#200]
  \_UnionAll[[salary{r}#174, $$salary$converted_to$keyword{r$}#178]]

without defensive evals.

With a defensive eval, I think this would be

Project[[$$expanded$temp$name#200 AS salary#175]]
\_MvExpand[$$target$temp$name#199, $$expanded$temp$name#200]
  \_Eval[$$salary$converted_to$keyword{r$}#178 AS $$target$temp$name#199]
    \_UnionAll[[salary{r}#174, $$salary$converted_to$keyword{r$}#178]]

Set<String> inputNames = pj.inputSet().stream().map(NamedExpression::name).collect(Collectors.toSet());
if (projections.stream().anyMatch(e -> e instanceof Alias alias && inputNames.contains(alias.toAttribute().name()))) {
return mvExpand;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is too broad. We only get a problem with duplicate output attributes if

  • the expanded field has the same name as a field in the projections input set, and
  • the projection shadows that specific field from the projection input set.


// Find if the target is aliased in the project and create an alias with temporary names for it.
for (int i = 0; i < projections.size(); i++) {
NamedExpression projection = projections.get(i);
if (projection instanceof Alias alias) {
if (alias.toAttribute().semanticEquals(mvExpand.target().toAttribute())) {
// Check if the alias's original field (child) is referenced elsewhere in the projections.
// If the original field is not referenced by any other projection or alias,
// we don't need to inject an Eval to preserve it, and can safely resolve renames and push down.
Comment on lines +105 to +107
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Very helpful comments, btw!

if (projections.stream()
.anyMatch(
ne -> ne.semanticEquals(alias.child())
|| ne instanceof Alias as && as.child().semanticEquals(alias.child()) && as != alias
) == false) {
// The alias's original field is not referenced elsewhere, no need to preserve it,
mvExpand = PushDownUtils.resolveRenamesFromProject(mvExpand, pj);
break;
}

// for query like: row a = 2 | eval b = a | keep * | mv_expand b
Alias aliasAlias = new Alias(
alias.source(),
TemporaryNameUtils.temporaryName(alias.child(), alias.toAttribute(), 0),
alias.child()
);
projections.set(i, mvExpand.expanded());
pj = new Project(pj.source(), new Eval(aliasAlias.source(), pj.child(), List.of(aliasAlias)), projections);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is confusing. The projection at i is the expanded attribute, but this only exists after the mvExpand, so this Project here is inconsistent. This looks like a bug and I don't understand how the code in the next code block (lines 84 and below) works based on this inconsistent state.

It looks to me like the projections list was meant to become the final projections that we put on top of the mv expand. This is also a valid approach, and this code seems to nearly do it - the only thing left to do is to update the projections in the case where the target field is an un-aliased projection.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Due to continuously encountering special cases while pushing down mv_expand past the project, I have always iterated on this branch based on the previous code versions. As a result, even I find this version of the branch quite confusing when trying to recall my original thought process.

I have now entirely refactored the logic based on your suggestions, and I believe the new version is significantly cleaner and easier to understand.

mvExpand = new MvExpand(mvExpand.source(), pj, aliasAlias.toAttribute(), mvExpand.expanded());
break;
} else if (alias.child().semanticEquals(mvExpand.target().toAttribute())) {
// for query like: row a = 2 | eval b = a | keep * | mv_expand a
Alias aliasAlias = new Alias(
alias.source(),
TemporaryNameUtils.temporaryName(alias.child(), alias.toAttribute(), 0),
alias.child()
);
projections.set(i, alias.replaceChild(aliasAlias.toAttribute()));
pj = new Project(pj.source(), new Eval(aliasAlias.source(), pj.child(), List.of(aliasAlias)), projections);
mvExpand = mvExpand.replaceChild(pj);
break;
}
}
}

// Build a map of aliases from the original projection
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The projection pj can be updated by the code above. I can't verify that using the aliases from pj is the right thing here.

AttributeMap.Builder<Alias> aliasBuilder = AttributeMap.builder();
pj.forEachExpression(Alias.class, alias -> aliasBuilder.put(alias.toAttribute(), alias));
AttributeMap<Alias> aliases = aliasBuilder.build();

// Push down the MvExpand past the Project
MvExpand pushedDownMvExpand = mvExpand.replaceChild(pj.child());

// Create a new projection at the top based on mvExpand.output(), plugging back in the aliases
List<NamedExpression> newProjections = new ArrayList<>();
for (Attribute outputExpr : mvExpand.output()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similarly, mvExpand has been mutated before reaching here. I can't say why this would be correct to use.

Alias alias = aliases.resolve(outputExpr, null);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't normally use resolve for an AttributeMap<Alias>. It's really meant for AttributeMap<Attribute> because it can deal with multiple indirections. We don't do that here, so we should rather just use get.

if (alias == null) {
// No alias, use the output expression directly
newProjections.add(outputExpr);
} else if (alias.child().semanticEquals(mvExpand.target().toAttribute())) {
// Alias child is the target attribute, replace it with the expanded attribute
newProjections.add(alias.replaceChild(mvExpand.expanded()));
} else {
// Keep the alias as is
newProjections.add(alias);
}
}

return new Project(pj.source(), pushedDownMvExpand, newProjections);
}
return mvExpand;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ public static Project pushDownPastProject(UnaryPlan parent) {
}
}

private static <P extends LogicalPlan> P resolveRenamesFromProject(P plan, Project project) {
public static <P extends LogicalPlan> P resolveRenamesFromProject(P plan, Project project) {
AttributeMap.Builder<Expression> aliasBuilder = AttributeMap.builder();
project.forEachExpression(Alias.class, a -> aliasBuilder.put(a.toAttribute(), a.child()));
var aliases = aliasBuilder.build();
Expand Down
Loading