Skip to content

Commit fcb87b5

Browse files
ES|QL: fix join masking eval (#126614)
1 parent 39f026a commit fcb87b5

File tree

6 files changed

+214
-46
lines changed

6 files changed

+214
-46
lines changed

docs/changelog/126614.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 126614
2+
summary: Fix join masking eval
3+
area: ES|QL
4+
type: bug
5+
issues: []

x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,13 @@ public abstract class GenerativeRestTest extends ESRestTestCase {
4444
// Awaiting fixes
4545
"estimated row size \\[0\\] wasn't set", // https://github.com/elastic/elasticsearch/issues/121739
4646
"unknown physical plan node \\[OrderExec\\]", // https://github.com/elastic/elasticsearch/issues/120817
47-
"Unknown column \\[<all-fields-projected>\\]", // https://github.com/elastic/elasticsearch/issues/121741
48-
//
47+
"Unknown column \\[<all-fields-projected>\\]", // https://github.com/elastic/elasticsearch/issues/121741,
48+
"Plan \\[ProjectExec\\[\\[<no-fields>.* optimized incorrectly due to missing references", // https://github.com/elastic/elasticsearch/issues/125866
49+
"only supports KEYWORD or TEXT values, found expression", // https://github.com/elastic/elasticsearch/issues/126017
50+
"token recognition error at: '``", // https://github.com/elastic/elasticsearch/issues/125870
51+
"Unknown column \\[.*\\]", // https://github.com/elastic/elasticsearch/issues/126026
52+
"optimized incorrectly due to missing references", // https://github.com/elastic/elasticsearch/issues/116781
53+
"No matches found for pattern", // https://github.com/elastic/elasticsearch/issues/126418
4954
"The incoming YAML document exceeds the limit:" // still to investigate, but it seems to be specific to the test framework
5055
);
5156

x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1527,3 +1527,45 @@ from *
15271527
salary_change.long:double|foo:long
15281528
5.0 |1698069301543123456
15291529
;
1530+
1531+
1532+
joinMaskingEval
1533+
required_capability: join_lookup_v12
1534+
required_capability: fix_join_masking_eval
1535+
from languag*
1536+
| eval type = null
1537+
| rename language_name as message
1538+
| lookup join message_types_lookup on message
1539+
| rename type as message
1540+
| lookup join message_types_lookup on message
1541+
| keep `language.name`
1542+
;
1543+
1544+
ignoreOrder:true
1545+
language.name:text
1546+
null
1547+
null
1548+
null
1549+
null
1550+
null
1551+
null
1552+
null
1553+
null
1554+
null
1555+
null
1556+
null
1557+
null
1558+
null
1559+
null
1560+
null
1561+
null
1562+
null
1563+
null
1564+
null
1565+
null
1566+
null
1567+
English
1568+
French
1569+
Spanish
1570+
German
1571+
;

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -877,7 +877,13 @@ public enum Cap {
877877
* the ownership of that block - but didn't account for the fact that the caller might close it, leading to double releases
878878
* in some union type queries. C.f. https://github.com/elastic/elasticsearch/issues/125850
879879
*/
880-
FIX_DOUBLY_RELEASED_NULL_BLOCKS_IN_VALUESOURCEREADER;
880+
FIX_DOUBLY_RELEASED_NULL_BLOCKS_IN_VALUESOURCEREADER,
881+
882+
/**
883+
* During resolution (pre-analysis) we have to consider that joins or enriches can override EVALuated values
884+
* https://github.com/elastic/elasticsearch/issues/126419
885+
*/
886+
FIX_JOIN_MASKING_EVAL;
881887

882888
private final boolean enabled;
883889

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java

Lines changed: 66 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -58,11 +58,21 @@
5858
import org.elasticsearch.xpack.esql.parser.QueryParams;
5959
import org.elasticsearch.xpack.esql.plan.IndexPattern;
6060
import org.elasticsearch.xpack.esql.plan.logical.Aggregate;
61+
import org.elasticsearch.xpack.esql.plan.logical.Drop;
6162
import org.elasticsearch.xpack.esql.plan.logical.Enrich;
63+
import org.elasticsearch.xpack.esql.plan.logical.Eval;
64+
import org.elasticsearch.xpack.esql.plan.logical.Filter;
65+
import org.elasticsearch.xpack.esql.plan.logical.InlineStats;
66+
import org.elasticsearch.xpack.esql.plan.logical.Insist;
6267
import org.elasticsearch.xpack.esql.plan.logical.Keep;
68+
import org.elasticsearch.xpack.esql.plan.logical.Limit;
6369
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
70+
import org.elasticsearch.xpack.esql.plan.logical.MvExpand;
71+
import org.elasticsearch.xpack.esql.plan.logical.OrderBy;
6472
import org.elasticsearch.xpack.esql.plan.logical.Project;
6573
import org.elasticsearch.xpack.esql.plan.logical.RegexExtract;
74+
import org.elasticsearch.xpack.esql.plan.logical.Rename;
75+
import org.elasticsearch.xpack.esql.plan.logical.TopN;
6676
import org.elasticsearch.xpack.esql.plan.logical.UnresolvedRelation;
6777
import org.elasticsearch.xpack.esql.plan.logical.join.InlineJoin;
6878
import org.elasticsearch.xpack.esql.plan.logical.join.JoinTypes;
@@ -574,6 +584,8 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
574584
var keepJoinRefsBuilder = AttributeSet.builder();
575585
Set<String> wildcardJoinIndices = new java.util.HashSet<>();
576586

587+
boolean[] canRemoveAliases = new boolean[] { true };
588+
577589
parsed.forEachDown(p -> {// go over each plan top-down
578590
if (p instanceof RegexExtract re) { // for Grok and Dissect
579591
// remove other down-the-tree references to the extracted fields
@@ -619,20 +631,37 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
619631
}
620632
}
621633

622-
// remove any already discovered UnresolvedAttributes that are in fact aliases defined later down in the tree
623-
// for example "from test | eval x = salary | stats max = max(x) by gender"
624-
// remove the UnresolvedAttribute "x", since that is an Alias defined in "eval"
625-
AttributeSet planRefs = p.references();
626-
Set<String> fieldNames = planRefs.names();
627-
p.forEachExpressionDown(Alias.class, alias -> {
628-
// do not remove the UnresolvedAttribute that has the same name as its alias, ie "rename id = id"
629-
// or the UnresolvedAttributes that are used in Functions that have aliases "STATS id = MAX(id)"
630-
if (fieldNames.contains(alias.name())) {
631-
return;
632-
}
633-
referencesBuilder.removeIf(attr -> matchByName(attr, alias.name(), keepCommandRefsBuilder.contains(attr)));
634-
});
634+
// If the current node in the tree is of type JOIN (lookup join, inlinestats) or ENRICH or other type of
635+
// command that we may add in the future which can override already defined Aliases with EVAL
636+
// (for example
637+
//
638+
// from test
639+
// | eval ip = 123
640+
// | enrich ips_policy ON hostname
641+
// | rename ip AS my_ip
642+
//
643+
// and ips_policy enriches the results with the same name ip field),
644+
// these aliases should be kept in the list of fields.
645+
if (canRemoveAliases[0] && couldOverrideAliases(p)) {
646+
canRemoveAliases[0] = false;
647+
}
648+
if (canRemoveAliases[0]) {
649+
// remove any already discovered UnresolvedAttributes that are in fact aliases defined later down in the tree
650+
// for example "from test | eval x = salary | stats max = max(x) by gender"
651+
// remove the UnresolvedAttribute "x", since that is an Alias defined in "eval"
652+
AttributeSet planRefs = p.references();
653+
Set<String> fieldNames = planRefs.names();
654+
p.forEachExpressionDown(Alias.class, alias -> {
655+
// do not remove the UnresolvedAttribute that has the same name as its alias, ie "rename id AS id"
656+
// or the UnresolvedAttributes that are used in Functions that have aliases "STATS id = MAX(id)"
657+
if (fieldNames.contains(alias.name())) {
658+
return;
659+
}
660+
referencesBuilder.removeIf(attr -> matchByName(attr, alias.name(), keepCommandRefsBuilder.contains(attr)));
661+
});
662+
}
635663
});
664+
636665
// Add JOIN ON column references afterward to avoid Alias removal
637666
referencesBuilder.addAll(keepJoinRefsBuilder);
638667
// If any JOIN commands need wildcard field-caps calls, persist the index names
@@ -656,6 +685,30 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
656685
}
657686
}
658687

688+
/**
689+
* Could a plan "accidentally" override aliases?
690+
* Examples are JOIN and ENRICH, that _could_ produce fields with the same
691+
* name of an existing alias, based on their index mapping.
692+
* Here we just have to consider commands where this information is not available before index resolution,
693+
* eg. EVAL, GROK, DISSECT can override an alias, but we know it in advance, ie. we don't need to resolve indices to know.
694+
*/
695+
private static boolean couldOverrideAliases(LogicalPlan p) {
696+
return (p instanceof Aggregate
697+
|| p instanceof Drop
698+
|| p instanceof Eval
699+
|| p instanceof Filter
700+
|| p instanceof InlineStats
701+
|| p instanceof Insist
702+
|| p instanceof Keep
703+
|| p instanceof Limit
704+
|| p instanceof MvExpand
705+
|| p instanceof OrderBy
706+
|| p instanceof Project
707+
|| p instanceof RegexExtract
708+
|| p instanceof Rename
709+
|| p instanceof TopN) == false;
710+
}
711+
659712
private static boolean matchByName(Attribute attr, String other, boolean skipIfPattern) {
660713
boolean isPattern = Regex.isSimpleMatchPattern(attr.name());
661714
if (skipIfPattern && isPattern) {

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java

Lines changed: 87 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -478,13 +478,16 @@ public void testDropAllColumns_WithStats() {
478478
}
479479

480480
public void testEnrichOn() {
481-
assertFieldNames("""
482-
from employees
483-
| sort emp_no
484-
| limit 1
485-
| eval x = to_string(languages)
486-
| enrich languages_policy on x
487-
| keep emp_no, language_name""", Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*"));
481+
assertFieldNames(
482+
"""
483+
from employees
484+
| sort emp_no
485+
| limit 1
486+
| eval x = to_string(languages)
487+
| enrich languages_policy on x
488+
| keep emp_no, language_name""",
489+
Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*", "x", "x.*")
490+
);
488491
}
489492

490493
public void testEnrichOn2() {
@@ -494,7 +497,7 @@ public void testEnrichOn2() {
494497
| enrich languages_policy on x
495498
| keep emp_no, language_name
496499
| sort emp_no
497-
| limit 1""", Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*"));
500+
| limit 1""", Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*", "x", "x.*"));
498501
}
499502

500503
public void testUselessEnrich() {
@@ -512,15 +515,15 @@ public void testSimpleSortLimit() {
512515
| enrich languages_policy on x
513516
| keep emp_no, language_name
514517
| sort emp_no
515-
| limit 1""", Set.of("languages", "languages.*", "emp_no", "emp_no.*", "language_name", "language_name.*"));
518+
| limit 1""", Set.of("languages", "languages.*", "emp_no", "emp_no.*", "language_name", "language_name.*", "x", "x.*"));
516519
}
517520

518521
public void testWith() {
519522
assertFieldNames(
520523
"""
521524
from employees | eval x = to_string(languages) | keep emp_no, x | sort emp_no | limit 1
522525
| enrich languages_policy on x with language_name""",
523-
Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*")
526+
Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*", "x", "x.*")
524527
);
525528
}
526529

@@ -529,7 +532,7 @@ public void testWithAlias() {
529532
"""
530533
from employees | sort emp_no | limit 3 | eval x = to_string(languages) | keep emp_no, x
531534
| enrich languages_policy on x with lang = language_name""",
532-
Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*")
535+
Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*", "x", "x.*")
533536
);
534537
}
535538

@@ -538,7 +541,7 @@ public void testWithAliasSort() {
538541
"""
539542
from employees | eval x = to_string(languages) | keep emp_no, x | sort emp_no | limit 3
540543
| enrich languages_policy on x with lang = language_name""",
541-
Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*")
544+
Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*", "x", "x.*")
542545
);
543546
}
544547

@@ -547,7 +550,7 @@ public void testWithAliasAndPlain() {
547550
"""
548551
from employees | sort emp_no desc | limit 3 | eval x = to_string(languages) | keep emp_no, x
549552
| enrich languages_policy on x with lang = language_name, language_name""",
550-
Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*")
553+
Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*", "x", "x.*")
551554
);
552555
}
553556

@@ -556,7 +559,7 @@ public void testWithTwoAliasesSameProp() {
556559
"""
557560
from employees | sort emp_no | limit 1 | eval x = to_string(languages) | keep emp_no, x
558561
| enrich languages_policy on x with lang = language_name, lang2 = language_name""",
559-
Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*")
562+
Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*", "x", "x.*")
560563
);
561564
}
562565

@@ -565,7 +568,7 @@ public void testRedundantWith() {
565568
"""
566569
from employees | sort emp_no | limit 1 | eval x = to_string(languages) | keep emp_no, x
567570
| enrich languages_policy on x with language_name, language_name""",
568-
Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*")
571+
Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*", "x", "x.*")
569572
);
570573
}
571574

@@ -588,28 +591,34 @@ public void testConstantNullInput() {
588591
| eval x = to_string(languages)
589592
| keep emp_no, x
590593
| enrich languages_policy on x with language_name, language_name""",
591-
Set.of("languages", "languages.*", "emp_no", "emp_no.*", "language_name", "language_name.*")
594+
Set.of("languages", "languages.*", "emp_no", "emp_no.*", "language_name", "language_name.*", "x", "x.*")
592595
);
593596
}
594597

595598
public void testEnrichEval() {
596-
assertFieldNames("""
597-
from employees
598-
| eval x = to_string(languages)
599-
| enrich languages_policy on x with lang = language_name
600-
| eval language = concat(x, "-", lang)
601-
| keep emp_no, x, lang, language
602-
| sort emp_no desc | limit 3""", Set.of("languages", "languages.*", "emp_no", "emp_no.*", "language_name", "language_name.*"));
599+
assertFieldNames(
600+
"""
601+
from employees
602+
| eval x = to_string(languages)
603+
| enrich languages_policy on x with lang = language_name
604+
| eval language = concat(x, "-", lang)
605+
| keep emp_no, x, lang, language
606+
| sort emp_no desc | limit 3""",
607+
Set.of("languages", "languages.*", "emp_no", "emp_no.*", "language_name", "language_name.*", "x", "x.*", "lang", "lang.*")
608+
);
603609
}
604610

605611
public void testSimple() {
606-
assertFieldNames("""
607-
from employees
608-
| eval x = 1, y = to_string(languages)
609-
| enrich languages_policy on y
610-
| where x > 1
611-
| keep emp_no, language_name
612-
| limit 1""", Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*"));
612+
assertFieldNames(
613+
"""
614+
from employees
615+
| eval x = 1, y = to_string(languages)
616+
| enrich languages_policy on y
617+
| where x > 1
618+
| keep emp_no, language_name
619+
| limit 1""",
620+
Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*", "x", "y", "x.*", "y.*")
621+
);
613622
}
614623

615624
public void testEvalNullSort() {
@@ -1653,6 +1662,54 @@ public void testInsist_multiFieldMappedMultiIndex() {
16531662
);
16541663
}
16551664

1665+
public void testJoinMaskingKeep() {
1666+
assertFieldNames(
1667+
"""
1668+
from languag*
1669+
| eval type = null
1670+
| rename language_name as message
1671+
| lookup join message_types_lookup on message
1672+
| rename type as message
1673+
| lookup join message_types_lookup on message
1674+
| keep `language.name`""",
1675+
Set.of("language.name", "type", "language_name", "message", "language_name.*", "message.*", "type.*", "language.name.*")
1676+
);
1677+
}
1678+
1679+
public void testJoinMaskingKeep2() {
1680+
assertFieldNames("""
1681+
from languag*
1682+
| eval type = "foo"
1683+
| rename type as message
1684+
| lookup join message_types_lookup on message
1685+
| rename type as message
1686+
| lookup join message_types_lookup on message
1687+
| keep `language.name`""", Set.of("language.name", "type", "message", "message.*", "type.*", "language.name.*"));
1688+
}
1689+
1690+
public void testEnrichMaskingEvalOn() {
1691+
assertFieldNames("""
1692+
from employees
1693+
| eval language_name = null
1694+
| enrich languages_policy on languages
1695+
| rename language_name as languages
1696+
| eval languages = length(languages)
1697+
| enrich languages_policy on languages
1698+
| keep emp_no, language_name""", Set.of("emp_no", "language_name", "languages", "language_name.*", "languages.*", "emp_no.*"));
1699+
}
1700+
1701+
public void testEnrichAndJoinMaskingEvalWh() {
1702+
assertFieldNames("""
1703+
from employees
1704+
| eval language_name = null
1705+
| enrich languages_policy on languages
1706+
| rename language_name as languages
1707+
| eval languages = length(languages)
1708+
| enrich languages_policy on languages
1709+
| lookup join message_types_lookup on language_name
1710+
| keep emp_no, language_name""", Set.of("emp_no", "language_name", "languages", "language_name.*", "languages.*", "emp_no.*"));
1711+
}
1712+
16561713
private Set<String> fieldNames(String query, Set<String> enrichPolicyMatchFields) {
16571714
var preAnalysisResult = new EsqlSession.PreAnalysisResult(null);
16581715
return EsqlSession.fieldNames(parser.createStatement(query), enrichPolicyMatchFields, preAnalysisResult).fieldNames();

0 commit comments

Comments
 (0)