Skip to content

Commit d7b78dd

Browse files
ES|QL: fix join masking eval (#126614) (#126860)
1 parent a9aef25 commit d7b78dd

File tree

6 files changed

+213
-46
lines changed

6 files changed

+213
-46
lines changed

docs/changelog/126614.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 126614
2+
summary: Fix join masking eval
3+
area: ES|QL
4+
type: bug
5+
issues: []

x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,13 @@ public abstract class GenerativeRestTest extends ESRestTestCase {
4444
// Awaiting fixes
4545
"estimated row size \\[0\\] wasn't set", // https://github.com/elastic/elasticsearch/issues/121739
4646
"unknown physical plan node \\[OrderExec\\]", // https://github.com/elastic/elasticsearch/issues/120817
47-
"Unknown column \\[<all-fields-projected>\\]", // https://github.com/elastic/elasticsearch/issues/121741
48-
//
47+
"Unknown column \\[<all-fields-projected>\\]", // https://github.com/elastic/elasticsearch/issues/121741,
48+
"Plan \\[ProjectExec\\[\\[<no-fields>.* optimized incorrectly due to missing references", // https://github.com/elastic/elasticsearch/issues/125866
49+
"only supports KEYWORD or TEXT values, found expression", // https://github.com/elastic/elasticsearch/issues/126017
50+
"token recognition error at: '``", // https://github.com/elastic/elasticsearch/issues/125870
51+
"Unknown column \\[.*\\]", // https://github.com/elastic/elasticsearch/issues/126026
52+
"optimized incorrectly due to missing references", // https://github.com/elastic/elasticsearch/issues/116781
53+
"No matches found for pattern", // https://github.com/elastic/elasticsearch/issues/126418
4954
"The incoming YAML document exceeds the limit:" // still to investigate, but it seems to be specific to the test framework
5055
);
5156

x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1502,3 +1502,45 @@ from *
15021502
salary_change.long:double|foo:long
15031503
5.0 |1698069301543123456
15041504
;
1505+
1506+
1507+
joinMaskingEval
1508+
required_capability: join_lookup_v12
1509+
required_capability: fix_join_masking_eval
1510+
from languag*
1511+
| eval type = null
1512+
| rename language_name as message
1513+
| lookup join message_types_lookup on message
1514+
| rename type as message
1515+
| lookup join message_types_lookup on message
1516+
| keep `language.name`
1517+
;
1518+
1519+
ignoreOrder:true
1520+
language.name:text
1521+
null
1522+
null
1523+
null
1524+
null
1525+
null
1526+
null
1527+
null
1528+
null
1529+
null
1530+
null
1531+
null
1532+
null
1533+
null
1534+
null
1535+
null
1536+
null
1537+
null
1538+
null
1539+
null
1540+
null
1541+
null
1542+
English
1543+
French
1544+
Spanish
1545+
German
1546+
;

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -799,7 +799,13 @@ public enum Cap {
799799
/**
800800
* Support loading of ip fields if they are not indexed.
801801
*/
802-
LOADING_NON_INDEXED_IP_FIELDS;
802+
LOADING_NON_INDEXED_IP_FIELDS,
803+
804+
/**
805+
* During resolution (pre-analysis) we have to consider that joins or enriches can override EVALuated values
806+
* https://github.com/elastic/elasticsearch/issues/126419
807+
*/
808+
FIX_JOIN_MASKING_EVAL;
803809

804810
private final boolean enabled;
805811

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java

Lines changed: 65 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,20 @@
5656
import org.elasticsearch.xpack.esql.parser.QueryParams;
5757
import org.elasticsearch.xpack.esql.plan.IndexPattern;
5858
import org.elasticsearch.xpack.esql.plan.logical.Aggregate;
59+
import org.elasticsearch.xpack.esql.plan.logical.Drop;
5960
import org.elasticsearch.xpack.esql.plan.logical.Enrich;
61+
import org.elasticsearch.xpack.esql.plan.logical.Eval;
62+
import org.elasticsearch.xpack.esql.plan.logical.Filter;
63+
import org.elasticsearch.xpack.esql.plan.logical.InlineStats;
6064
import org.elasticsearch.xpack.esql.plan.logical.Keep;
65+
import org.elasticsearch.xpack.esql.plan.logical.Limit;
6166
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
67+
import org.elasticsearch.xpack.esql.plan.logical.MvExpand;
68+
import org.elasticsearch.xpack.esql.plan.logical.OrderBy;
6269
import org.elasticsearch.xpack.esql.plan.logical.Project;
6370
import org.elasticsearch.xpack.esql.plan.logical.RegexExtract;
71+
import org.elasticsearch.xpack.esql.plan.logical.Rename;
72+
import org.elasticsearch.xpack.esql.plan.logical.TopN;
6473
import org.elasticsearch.xpack.esql.plan.logical.UnresolvedRelation;
6574
import org.elasticsearch.xpack.esql.plan.logical.join.InlineJoin;
6675
import org.elasticsearch.xpack.esql.plan.logical.join.JoinTypes;
@@ -443,6 +452,7 @@ private void preAnalyzeIndices(
443452

444453
/**
445454
* Check if there are any clusters to search.
455+
*
446456
* @return true if there are no clusters to search, false otherwise
447457
*/
448458
private boolean allCCSClustersSkipped(
@@ -542,6 +552,8 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
542552
var keepJoinRefsBuilder = AttributeSet.builder();
543553
Set<String> wildcardJoinIndices = new java.util.HashSet<>();
544554

555+
boolean[] canRemoveAliases = new boolean[] { true };
556+
545557
parsed.forEachDown(p -> {// go over each plan top-down
546558
if (p instanceof RegexExtract re) { // for Grok and Dissect
547559
// remove other down-the-tree references to the extracted fields
@@ -587,20 +599,37 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
587599
}
588600
}
589601

590-
// remove any already discovered UnresolvedAttributes that are in fact aliases defined later down in the tree
591-
// for example "from test | eval x = salary | stats max = max(x) by gender"
592-
// remove the UnresolvedAttribute "x", since that is an Alias defined in "eval"
593-
AttributeSet planRefs = p.references();
594-
Set<String> fieldNames = planRefs.names();
595-
p.forEachExpressionDown(Alias.class, alias -> {
596-
// do not remove the UnresolvedAttribute that has the same name as its alias, ie "rename id = id"
597-
// or the UnresolvedAttributes that are used in Functions that have aliases "STATS id = MAX(id)"
598-
if (fieldNames.contains(alias.name())) {
599-
return;
600-
}
601-
referencesBuilder.removeIf(attr -> matchByName(attr, alias.name(), keepCommandRefsBuilder.contains(attr)));
602-
});
602+
// If the current node in the tree is of type JOIN (lookup join, inlinestats) or ENRICH or other type of
603+
// command that we may add in the future which can override already defined Aliases with EVAL
604+
// (for example
605+
//
606+
// from test
607+
// | eval ip = 123
608+
// | enrich ips_policy ON hostname
609+
// | rename ip AS my_ip
610+
//
611+
// and ips_policy enriches the results with the same name ip field),
612+
// these aliases should be kept in the list of fields.
613+
if (canRemoveAliases[0] && couldOverrideAliases(p)) {
614+
canRemoveAliases[0] = false;
615+
}
616+
if (canRemoveAliases[0]) {
617+
// remove any already discovered UnresolvedAttributes that are in fact aliases defined later down in the tree
618+
// for example "from test | eval x = salary | stats max = max(x) by gender"
619+
// remove the UnresolvedAttribute "x", since that is an Alias defined in "eval"
620+
AttributeSet planRefs = p.references();
621+
Set<String> fieldNames = planRefs.names();
622+
p.forEachExpressionDown(Alias.class, alias -> {
623+
// do not remove the UnresolvedAttribute that has the same name as its alias, ie "rename id AS id"
624+
// or the UnresolvedAttributes that are used in Functions that have aliases "STATS id = MAX(id)"
625+
if (fieldNames.contains(alias.name())) {
626+
return;
627+
}
628+
referencesBuilder.removeIf(attr -> matchByName(attr, alias.name(), keepCommandRefsBuilder.contains(attr)));
629+
});
630+
}
603631
});
632+
604633
// Add JOIN ON column references afterward to avoid Alias removal
605634
referencesBuilder.addAll(keepJoinRefsBuilder);
606635
// If any JOIN commands need wildcard field-caps calls, persist the index names
@@ -624,6 +653,29 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
624653
}
625654
}
626655

656+
/**
657+
* Could a plan "accidentally" override aliases?
658+
* Examples are JOIN and ENRICH, that _could_ produce fields with the same
659+
* name of an existing alias, based on their index mapping.
660+
* Here we just have to consider commands where this information is not available before index resolution,
661+
* eg. EVAL, GROK, DISSECT can override an alias, but we know it in advance, ie. we don't need to resolve indices to know.
662+
*/
663+
private static boolean couldOverrideAliases(LogicalPlan p) {
664+
return (p instanceof Aggregate
665+
|| p instanceof Drop
666+
|| p instanceof Eval
667+
|| p instanceof Filter
668+
|| p instanceof InlineStats
669+
|| p instanceof Keep
670+
|| p instanceof Limit
671+
|| p instanceof MvExpand
672+
|| p instanceof OrderBy
673+
|| p instanceof Project
674+
|| p instanceof RegexExtract
675+
|| p instanceof Rename
676+
|| p instanceof TopN) == false;
677+
}
678+
627679
private static boolean matchByName(Attribute attr, String other, boolean skipIfPattern) {
628680
boolean isPattern = Regex.isSimpleMatchPattern(attr.name());
629681
if (skipIfPattern && isPattern) {

0 commit comments

Comments
 (0)