Skip to content

Commit de42ba3

Browse files
ES|QL: fix join masking eval (#126614)
1 parent e030e44 commit de42ba3

File tree

6 files changed

+211
-46
lines changed

6 files changed

+211
-46
lines changed

docs/changelog/126614.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 126614
2+
summary: Fix join masking eval
3+
area: ES|QL
4+
type: bug
5+
issues: []

x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,6 @@ public abstract class GenerativeRestTest extends ESRestTestCase {
5555
"Unknown column \\[.*\\]", // https://github.com/elastic/elasticsearch/issues/126026
5656
"optimized incorrectly due to missing references", // https://github.com/elastic/elasticsearch/issues/116781
5757
"No matches found for pattern", // https://github.com/elastic/elasticsearch/issues/126418
58-
"JOIN left field .* is incompatible with right field", // https://github.com/elastic/elasticsearch/issues/126419
59-
"Unsupported type .* for enrich", // most likely still https://github.com/elastic/elasticsearch/issues/126419
6058
"The incoming YAML document exceeds the limit:" // still to investigate, but it seems to be specific to the test framework
6159
);
6260

x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1586,3 +1586,45 @@ from *
15861586
salary_change.long:double|foo:long
15871587
5.0 |1698069301543123456
15881588
;
1589+
1590+
1591+
joinMaskingEval
1592+
required_capability: join_lookup_v12
1593+
required_capability: fix_join_masking_eval
1594+
from languag*
1595+
| eval type = null
1596+
| rename language_name as message
1597+
| lookup join message_types_lookup on message
1598+
| rename type as message
1599+
| lookup join message_types_lookup on message
1600+
| keep `language.name`
1601+
;
1602+
1603+
ignoreOrder:true
1604+
language.name:text
1605+
null
1606+
null
1607+
null
1608+
null
1609+
null
1610+
null
1611+
null
1612+
null
1613+
null
1614+
null
1615+
null
1616+
null
1617+
null
1618+
null
1619+
null
1620+
null
1621+
null
1622+
null
1623+
null
1624+
null
1625+
null
1626+
English
1627+
French
1628+
Spanish
1629+
German
1630+
;

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1005,7 +1005,13 @@ public enum Cap {
10051005
/**
10061006
* Support loading of ip fields if they are not indexed.
10071007
*/
1008-
LOADING_NON_INDEXED_IP_FIELDS;
1008+
LOADING_NON_INDEXED_IP_FIELDS,
1009+
1010+
/**
1011+
* During resolution (pre-analysis) we have to consider that joins or enriches can override EVALuated values
1012+
* https://github.com/elastic/elasticsearch/issues/126419
1013+
*/
1014+
FIX_JOIN_MASKING_EVAL;
10091015

10101016
private final boolean enabled;
10111017

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java

Lines changed: 70 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -60,13 +60,24 @@
6060
import org.elasticsearch.xpack.esql.parser.QueryParams;
6161
import org.elasticsearch.xpack.esql.plan.IndexPattern;
6262
import org.elasticsearch.xpack.esql.plan.logical.Aggregate;
63+
import org.elasticsearch.xpack.esql.plan.logical.Drop;
6364
import org.elasticsearch.xpack.esql.plan.logical.Enrich;
65+
import org.elasticsearch.xpack.esql.plan.logical.Eval;
66+
import org.elasticsearch.xpack.esql.plan.logical.Filter;
6467
import org.elasticsearch.xpack.esql.plan.logical.Fork;
68+
import org.elasticsearch.xpack.esql.plan.logical.InlineStats;
69+
import org.elasticsearch.xpack.esql.plan.logical.Insist;
6570
import org.elasticsearch.xpack.esql.plan.logical.Keep;
71+
import org.elasticsearch.xpack.esql.plan.logical.Limit;
6672
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
73+
import org.elasticsearch.xpack.esql.plan.logical.MvExpand;
74+
import org.elasticsearch.xpack.esql.plan.logical.OrderBy;
6775
import org.elasticsearch.xpack.esql.plan.logical.Project;
6876
import org.elasticsearch.xpack.esql.plan.logical.RegexExtract;
77+
import org.elasticsearch.xpack.esql.plan.logical.Rename;
78+
import org.elasticsearch.xpack.esql.plan.logical.TopN;
6979
import org.elasticsearch.xpack.esql.plan.logical.UnresolvedRelation;
80+
import org.elasticsearch.xpack.esql.plan.logical.inference.Completion;
7081
import org.elasticsearch.xpack.esql.plan.logical.inference.InferencePlan;
7182
import org.elasticsearch.xpack.esql.plan.logical.join.InlineJoin;
7283
import org.elasticsearch.xpack.esql.plan.logical.join.JoinTypes;
@@ -500,6 +511,7 @@ private void preAnalyzeMainIndices(
500511

501512
/**
502513
* Check if there are any clusters to search.
514+
*
503515
* @return true if there are no clusters to search, false otherwise
504516
*/
505517
private boolean allCCSClustersSkipped(
@@ -612,6 +624,8 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
612624
var keepJoinRefsBuilder = AttributeSet.builder();
613625
Set<String> wildcardJoinIndices = new java.util.HashSet<>();
614626

627+
boolean[] canRemoveAliases = new boolean[] { true };
628+
615629
parsed.forEachDown(p -> {// go over each plan top-down
616630
if (p instanceof RegexExtract re) { // for Grok and Dissect
617631
// remove other down-the-tree references to the extracted fields
@@ -657,20 +671,37 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
657671
}
658672
}
659673

660-
// remove any already discovered UnresolvedAttributes that are in fact aliases defined later down in the tree
661-
// for example "from test | eval x = salary | stats max = max(x) by gender"
662-
// remove the UnresolvedAttribute "x", since that is an Alias defined in "eval"
663-
AttributeSet planRefs = p.references();
664-
Set<String> fieldNames = planRefs.names();
665-
p.forEachExpressionDown(Alias.class, alias -> {
666-
// do not remove the UnresolvedAttribute that has the same name as its alias, ie "rename id = id"
667-
// or the UnresolvedAttributes that are used in Functions that have aliases "STATS id = MAX(id)"
668-
if (fieldNames.contains(alias.name())) {
669-
return;
670-
}
671-
referencesBuilder.removeIf(attr -> matchByName(attr, alias.name(), keepCommandRefsBuilder.contains(attr)));
672-
});
674+
// If the current node in the tree is of type JOIN (lookup join, inlinestats) or ENRICH or other type of
675+
// command that we may add in the future which can override already defined Aliases with EVAL
676+
// (for example
677+
//
678+
// from test
679+
// | eval ip = 123
680+
// | enrich ips_policy ON hostname
681+
// | rename ip AS my_ip
682+
//
683+
// and ips_policy enriches the results with the same name ip field),
684+
// these aliases should be kept in the list of fields.
685+
if (canRemoveAliases[0] && couldOverrideAliases(p)) {
686+
canRemoveAliases[0] = false;
687+
}
688+
if (canRemoveAliases[0]) {
689+
// remove any already discovered UnresolvedAttributes that are in fact aliases defined later down in the tree
690+
// for example "from test | eval x = salary | stats max = max(x) by gender"
691+
// remove the UnresolvedAttribute "x", since that is an Alias defined in "eval"
692+
AttributeSet planRefs = p.references();
693+
Set<String> fieldNames = planRefs.names();
694+
p.forEachExpressionDown(Alias.class, alias -> {
695+
// do not remove the UnresolvedAttribute that has the same name as its alias, ie "rename id AS id"
696+
// or the UnresolvedAttributes that are used in Functions that have aliases "STATS id = MAX(id)"
697+
if (fieldNames.contains(alias.name())) {
698+
return;
699+
}
700+
referencesBuilder.removeIf(attr -> matchByName(attr, alias.name(), keepCommandRefsBuilder.contains(attr)));
701+
});
702+
}
673703
});
704+
674705
// Add JOIN ON column references afterward to avoid Alias removal
675706
referencesBuilder.addAll(keepJoinRefsBuilder);
676707
// If any JOIN commands need wildcard field-caps calls, persist the index names
@@ -694,6 +725,32 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
694725
}
695726
}
696727

728+
/**
729+
* Could a plan "accidentally" override aliases?
730+
* Examples are JOIN and ENRICH, that _could_ produce fields with the same
731+
* name of an existing alias, based on their index mapping.
732+
* Here we just have to consider commands where this information is not available before index resolution,
733+
* eg. EVAL, GROK, DISSECT can override an alias, but we know it in advance, ie. we don't need to resolve indices to know.
734+
*/
735+
private static boolean couldOverrideAliases(LogicalPlan p) {
736+
return (p instanceof Aggregate
737+
|| p instanceof Completion
738+
|| p instanceof Drop
739+
|| p instanceof Eval
740+
|| p instanceof Filter
741+
|| p instanceof Fork
742+
|| p instanceof InlineStats
743+
|| p instanceof Insist
744+
|| p instanceof Keep
745+
|| p instanceof Limit
746+
|| p instanceof MvExpand
747+
|| p instanceof OrderBy
748+
|| p instanceof Project
749+
|| p instanceof RegexExtract
750+
|| p instanceof Rename
751+
|| p instanceof TopN) == false;
752+
}
753+
697754
private static boolean matchByName(Attribute attr, String other, boolean skipIfPattern) {
698755
boolean isPattern = Regex.isSimpleMatchPattern(attr.name());
699756
if (skipIfPattern && isPattern) {

0 commit comments

Comments
 (0)