From 873d2148810361d5bc7b75dba658c038c3d32fa1 Mon Sep 17 00:00:00 2001 From: Bogdan Pintea Date: Thu, 24 Apr 2025 12:06:20 +0200 Subject: [PATCH] ESQL: Have CombineProjections propagate references upwards (#127264) This will have CombineProjections allow references from the "under" plan be kept when merging stacked Projections. This is to prevent field attributes that are dropped by ReplaceMissingFieldWithNull be used in the resulting plan. (cherry picked from commit 9626fe54ebe38a6142a0b65816febded7e881423) --- .../xpack/esql/core/expression/Alias.java | 3 +- .../rules/logical/CombineProjections.java | 3 ++ .../local/ReplaceMissingFieldWithNull.java | 5 ++- .../LocalLogicalPlanOptimizerTests.java | 43 +++++++++++++++++-- .../LocalPhysicalPlanOptimizerTests.java | 19 ++++---- 5 files changed, 57 insertions(+), 16 deletions(-) diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/Alias.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/Alias.java index 1f7d03ba9d905..7195fdfb933a1 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/Alias.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/Alias.java @@ -93,7 +93,8 @@ protected NodeInfo info() { } public Alias replaceChild(Expression child) { - return new Alias(source(), name(), child, id(), synthetic()); + // these "nop" replacements can occur on attribute map resolutions having the default return same as the lookup key + return child == this.child ? this : new Alias(source(), name(), child, id(), synthetic()); } @Override diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/CombineProjections.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/CombineProjections.java index 0371510d6f306..87e70765d012f 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/CombineProjections.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/CombineProjections.java @@ -15,6 +15,7 @@ import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.Expressions; import org.elasticsearch.xpack.esql.core.expression.NamedExpression; +import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; import org.elasticsearch.xpack.esql.expression.function.grouping.Categorize; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; @@ -128,6 +129,8 @@ private static List combineProjections(List replaced = new ArrayList<>(); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java index 0441539a92a82..b9a81aa949ab3 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java @@ -58,7 +58,8 @@ public LogicalPlan apply(LogicalPlan plan, LocalLogicalOptimizerContext localLog // Do not use the attribute name, this can deviate from the field name for union types; use fieldName() instead. // Also retain fields from lookup indices because we do not have stats for these. Predicate shouldBeRetained = f -> f.field() instanceof PotentiallyUnmappedKeywordEsField - || (localLogicalOptimizerContext.searchStats().exists(f.fieldName()) || lookupFieldsBuilder.contains(f)); + || localLogicalOptimizerContext.searchStats().exists(f.fieldName()) + || lookupFieldsBuilder.contains(f); return plan.transformUp(p -> missingToNull(p, shouldBeRetained)); } @@ -77,7 +78,7 @@ private LogicalPlan missingToNull(LogicalPlan plan, Predicate sh for (int i = 0, size = relationOutput.size(); i < size; i++) { Attribute attr = relationOutput.get(i); NamedExpression projection; - if (attr instanceof FieldAttribute f && (shouldBeRetained.test(f) == false)) { + if (attr instanceof FieldAttribute f && shouldBeRetained.test(f) == false) { DataType dt = f.dataType(); Alias nullAlias = nullLiterals.get(dt); // save the first field as null (per datatype) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizerTests.java index 6903e5dfce35d..1d5da09dea417 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizerTests.java @@ -7,6 +7,7 @@ package org.elasticsearch.xpack.esql.optimizer; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.util.Maps; import org.elasticsearch.index.IndexMode; @@ -22,6 +23,7 @@ import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.FoldContext; import org.elasticsearch.xpack.esql.core.expression.Literal; +import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; @@ -141,10 +143,10 @@ public void testMissingFieldInFilterString() { /** * Expects - * Project[[last_name{f}#6]] + * Project[[last_name{r}#7]] * \_Eval[[null[KEYWORD] AS last_name]] - * \_Limit[10000[INTEGER]] - * \_EsRelation[test][_meta_field{f}#8, emp_no{f}#2, first_name{f}#3, gen..] + * \_Limit[1000[INTEGER],false] + * \_EsRelation[test][_meta_field{f}#9, emp_no{f}#3, first_name{f}#4, gen..] */ public void testMissingFieldInProject() { var plan = plan(""" @@ -158,7 +160,7 @@ public void testMissingFieldInProject() { var project = as(localPlan, Project.class); var projections = project.projections(); assertThat(Expressions.names(projections), contains("last_name")); - as(projections.get(0), FieldAttribute.class); + as(projections.get(0), ReferenceAttribute.class); var eval = as(project.child(), Eval.class); assertThat(Expressions.names(eval.fields()), contains("last_name")); var alias = as(eval.fields().get(0), Alias.class); @@ -171,6 +173,39 @@ public void testMissingFieldInProject() { assertThat(Expressions.names(source.output()), not(contains("last_name"))); } + /* + * Expects a similar plan to testMissingFieldInProject() above, except for the Alias's child value + * Project[[last_name{r}#4]] + * \_Eval[[[66 6f 6f][KEYWORD] AS last_name]] + * \_Limit[1000[INTEGER],false] + * \_EsRelation[test][_meta_field{f}#11, emp_no{f}#5, first_name{f}#6, ge..] + */ + public void testReassignedMissingFieldInProject() { + var plan = plan(""" + from test + | keep last_name + | eval last_name = "foo" + """); + + var testStats = statsForMissingField("last_name"); + var localPlan = localPlan(plan, testStats); + + var project = as(localPlan, Project.class); + var projections = project.projections(); + assertThat(Expressions.names(projections), contains("last_name")); + as(projections.get(0), ReferenceAttribute.class); + var eval = as(project.child(), Eval.class); + assertThat(Expressions.names(eval.fields()), contains("last_name")); + var alias = as(eval.fields().get(0), Alias.class); + var literal = as(alias.child(), Literal.class); + assertThat(literal.value(), is(new BytesRef("foo"))); + assertThat(literal.dataType(), is(DataType.KEYWORD)); + + var limit = as(eval.child(), Limit.class); + var source = as(limit.child(), EsRelation.class); + assertThat(Expressions.names(source.output()), not(contains("last_name"))); + } + /** * Expects * EsqlProject[[first_name{f}#4]] diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java index cb76e5de23ac5..f511035dae50f 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java @@ -38,8 +38,8 @@ import org.elasticsearch.xpack.esql.analysis.Verifier; import org.elasticsearch.xpack.esql.core.expression.Alias; import org.elasticsearch.xpack.esql.core.expression.Expressions; -import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.Literal; +import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.core.type.EsField; @@ -1305,15 +1305,16 @@ private EsQueryExec doTestOutOfRangeFilterPushdown(String query, Analyzer analyz return luceneQuery; } - /** + /* * Expects * LimitExec[1000[INTEGER]] - * \_ExchangeExec[[],false] - * \_ProjectExec[[_meta_field{f}#8, emp_no{r}#2, first_name{r}#3, gender{f}#4, job{f}#9, job.raw{f}#10, languages{f}#5, first_n - * ame{r}#3 AS last_name, long_noidx{f}#11, emp_no{r}#2 AS salary]] - * \_FieldExtractExec[_meta_field{f}#8, gender{f}#4, job{f}#9, job.raw{f}..] + * \_ExchangeExec[[_meta_field{f}#8, emp_no{f}#2, first_name{f}#3, gender{f}#4, hire_date{f}#9, job{f}#10, job.raw{f}#11, langua + * ges{f}#5, last_name{f}#6, long_noidx{f}#12, salary{f}#7],false] + * \_ProjectExec[[_meta_field{f}#8, emp_no{r}#2, first_name{r}#3, gender{f}#4, hire_date{f}#9, job{f}#10, job.raw{f}#11, langua + * ges{f}#5, first_name{r}#3 AS last_name, long_noidx{f}#12, emp_no{r}#2 AS salary]] + * \_FieldExtractExec[_meta_field{f}#8, gender{f}#4, hire_date{f}#9, job{..]<[],[]> * \_EvalExec[[null[INTEGER] AS emp_no, null[KEYWORD] AS first_name]] - * \_EsQueryExec[test], query[][_doc{f}#12], limit[1000], sort[] estimatedRowSize[270] + * \_EsQueryExec[test], indexMode[standard], query[][_doc{f}#13], limit[1000], sort[] estimatedRowSize[278] */ public void testMissingFieldsDoNotGetExtracted() { var stats = EsqlTestUtils.statsForMissingField("first_name", "last_name", "emp_no", "salary"); @@ -1340,9 +1341,9 @@ public void testMissingFieldsDoNotGetExtracted() { ) ); // emp_no - assertThat(projections.get(1), instanceOf(FieldAttribute.class)); + assertThat(projections.get(1), instanceOf(ReferenceAttribute.class)); // first_name - assertThat(projections.get(2), instanceOf(FieldAttribute.class)); + assertThat(projections.get(2), instanceOf(ReferenceAttribute.class)); // last_name --> first_name var nullAlias = Alias.unwrap(projections.get(8));