Skip to content

Commit ef7cc79

Browse files
authored
ESQL: Fix wildcard DROP after LOOKUP JOIN (elastic#130448) (elastic#130695)
In EsqlSession#fieldNames, keep track of wildcard patterns in DROP separately from the references and wildcard patterns in KEEP commands. Only the latter are relevant to determine if we need to ask for all fields for the lookup index in the field caps request. (cherry picked from commit 05fc6f2) # Conflicts: # x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java # x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java
1 parent 2f3d87a commit ef7cc79

File tree

5 files changed

+148
-16
lines changed

5 files changed

+148
-16
lines changed

docs/changelog/130448.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 130448
2+
summary: Fix wildcard drop after lookup join
3+
area: ES|QL
4+
type: bug
5+
issues:
6+
- 129561

x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1684,6 +1684,41 @@ max:long
16841684
8268153
16851685
;
16861686

1687+
wildcardDropAfterLookupJoin
1688+
required_capability: join_lookup_v12
1689+
required_capability: drop_with_wildcard_after_lookup_join
1690+
1691+
ROW somefield = 0, type = "Production"
1692+
| KEEP somefield, type
1693+
| LOOKUP JOIN message_types_lookup ON type
1694+
| DROP *field
1695+
;
1696+
1697+
type:keyword | message:keyword
1698+
Production | Production environment
1699+
;
1700+
1701+
1702+
wildcardDropAfterLookupJoinTwice
1703+
required_capability: join_lookup_v12
1704+
required_capability: drop_with_wildcard_after_lookup_join
1705+
1706+
ROW somefield = 0, type = "Production"
1707+
| KEEP somefield, type
1708+
| EVAL otherfield = 123, language_code = 3
1709+
| LOOKUP JOIN message_types_lookup ON type
1710+
| DROP *field
1711+
| EVAL foofield = 123
1712+
| KEEP *
1713+
| LOOKUP JOIN languages_lookup ON language_code
1714+
| DROP *ge, *field
1715+
;
1716+
1717+
type:keyword | language_code:integer | language_name:keyword
1718+
Production | 3 | Spanish
1719+
;
1720+
1721+
16871722
###############################################
16881723
# LOOKUP JOIN on date_nanos field
16891724
###############################################

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -908,6 +908,13 @@ public enum Cap {
908908
*/
909909
DROP_AGAIN_WITH_WILDCARD_AFTER_EVAL,
910910

911+
/**
912+
* Correctly ask for all fields from lookup indices even when there is e.g. a {@code DROP *field} after.
913+
* See <a href="https://github.com/elastic/elasticsearch/issues/129561">
914+
* ES|QL: missing columns for wildcard drop after lookup join #129561</a>
915+
*/
916+
DROP_WITH_WILDCARD_AFTER_LOOKUP_JOIN,
917+
911918
/**
912919
* During resolution (pre-analysis) we have to consider that joins can override regex extracted values
913920
* see <a href="https://github.com/elastic/elasticsearch/issues/127467"> ES|QL: pruning of JOINs leads to missing fields #127467 </a>

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -588,16 +588,20 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
588588
}
589589

590590
var referencesBuilder = AttributeSet.builder();
591-
// "keep" and "drop" attributes are special whenever a wildcard is used in their name, as the wildcard can shadow some
592-
// attributes ("lookup join" generated columns among others) and steps like removal of Aliases should ignore the fields
593-
// to remove if their name matches one of these wildcards.
591+
// "keep" and "drop" attributes are special whenever a wildcard is used in their name, as the wildcard can cover some
592+
// attributes ("lookup join" generated columns among others); steps like removal of Aliases should ignore fields matching the
593+
// wildcards.
594594
//
595-
// ie "from test | eval lang = languages + 1 | keep *l" should consider both "languages" and "*l" as valid fields to ask for
596-
// "from test | eval first_name = 1 | drop first_name | drop *name should also consider "*name" as valid field to ask for
595+
// E.g. "from test | eval lang = languages + 1 | keep *l" should consider both "languages" and "*l" as valid fields to ask for
596+
// "from test | eval first_name = 1 | drop first_name | drop *name" should also consider "*name" as valid field to ask for
597597
//
598598
// NOTE: the grammar allows wildcards to be used in other commands as well, but these are forbidden in the LogicalPlanBuilder
599-
var shadowingRefsBuilder = AttributeSet.builder();
600-
var keepJoinRefsBuilder = AttributeSet.builder();
599+
// Except in KEEP and DROP.
600+
var keepRefs = AttributeSet.builder();
601+
var dropWildcardRefs = AttributeSet.builder();
602+
// fields required to request for lookup joins to work
603+
var joinRefs = AttributeSet.builder();
604+
// lookup indices where we request "*" because we may require all their fields
601605
Set<String> wildcardJoinIndices = new java.util.HashSet<>();
602606

603607
boolean[] canRemoveAliases = new boolean[] { true };
@@ -615,14 +619,14 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
615619
referencesBuilder.addAll(enrichRefs);
616620
} else if (p instanceof LookupJoin join) {
617621
if (join.config().type() instanceof JoinTypes.UsingJoinType usingJoinType) {
618-
keepJoinRefsBuilder.addAll(usingJoinType.columns());
622+
joinRefs.addAll(usingJoinType.columns());
619623
}
620-
if (shadowingRefsBuilder.isEmpty()) {
624+
if (keepRefs.isEmpty()) {
621625
// No KEEP commands after the JOIN, so we need to mark this index for "*" field resolution
622626
wildcardJoinIndices.add(((UnresolvedRelation) join.right()).indexPattern().indexPattern());
623627
} else {
624628
// Keep commands can reference the join columns with names that shadow aliases, so we block their removal
625-
keepJoinRefsBuilder.addAll(shadowingRefsBuilder);
629+
joinRefs.addAll(keepRefs);
626630
}
627631
} else {
628632
referencesBuilder.addAll(p.references());
@@ -634,10 +638,16 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
634638
p.forEachExpression(UnresolvedNamePattern.class, up -> {
635639
var ua = new UnresolvedAttribute(up.source(), up.name());
636640
referencesBuilder.add(ua);
637-
shadowingRefsBuilder.add(ua);
641+
if (p instanceof Keep) {
642+
keepRefs.add(ua);
643+
} else if (p instanceof Drop) {
644+
dropWildcardRefs.add(ua);
645+
} else {
646+
throw new IllegalStateException("Only KEEP and DROP should allow wildcards");
647+
}
638648
});
639649
if (p instanceof Keep) {
640-
shadowingRefsBuilder.addAll(p.references());
650+
keepRefs.addAll(p.references());
641651
}
642652
}
643653

@@ -671,13 +681,15 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
671681
if (fieldNames.contains(ne.name())) {
672682
return;
673683
}
674-
referencesBuilder.removeIf(attr -> matchByName(attr, ne.name(), shadowingRefsBuilder.contains(attr)));
684+
referencesBuilder.removeIf(
685+
attr -> matchByName(attr, ne.name(), keepRefs.contains(attr) || dropWildcardRefs.contains(attr))
686+
);
675687
});
676688
}
677689
});
678690

679691
// Add JOIN ON column references afterward to avoid Alias removal
680-
referencesBuilder.addAll(keepJoinRefsBuilder);
692+
referencesBuilder.addAll(joinRefs);
681693
// If any JOIN commands need wildcard field-caps calls, persist the index names
682694
if (wildcardJoinIndices.isEmpty() == false) {
683695
result = result.withWildcardJoinIndices(wildcardJoinIndices);

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java

Lines changed: 74 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1969,7 +1969,7 @@ public void testDropAgainWithWildcardAfterEval() {
19691969
""", Set.of("emp_no", "emp_no.*", "*name", "*name.*"));
19701970
}
19711971

1972-
public void testDropWildcardedFields_AfterRename() {
1972+
public void testDropWildcardFieldsAfterRename() {
19731973
assertFieldNames(
19741974
"""
19751975
from employees
@@ -1982,7 +1982,30 @@ public void testDropWildcardedFields_AfterRename() {
19821982
);
19831983
}
19841984

1985-
public void testDropWildcardFields_WithLookupJoin() {
1985+
public void testDropWildcardFieldsAfterLookupJoins() {
1986+
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
1987+
assertFieldNames("""
1988+
FROM sample_data
1989+
| EVAL client_ip = client_ip::keyword
1990+
| LOOKUP JOIN clientips_lookup ON client_ip
1991+
| LOOKUP JOIN message_types_lookup ON message
1992+
| SORT @timestamp
1993+
| DROP *e""", Set.of("*"), Set.of());
1994+
}
1995+
1996+
public void testDropWildcardFieldsAfterLookupJoins2() {
1997+
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
1998+
assertFieldNames("""
1999+
FROM sample_data
2000+
| EVAL client_ip = client_ip::keyword
2001+
| LOOKUP JOIN clientips_lookup ON client_ip
2002+
| DROP *e, client_ip
2003+
| LOOKUP JOIN message_types_lookup ON message
2004+
| SORT @timestamp
2005+
| DROP *e""", Set.of("*"), Set.of());
2006+
}
2007+
2008+
public void testDropWildcardFieldsAfterLookupJoinsAndKeep() {
19862009
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
19872010
assertFieldNames(
19882011
"""
@@ -1998,6 +2021,55 @@ public void testDropWildcardFields_WithLookupJoin() {
19982021
);
19992022
}
20002023

2024+
public void testDropWildcardFieldsAfterLookupJoinKeepLookupJoin() {
2025+
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
2026+
assertFieldNames(
2027+
"""
2028+
FROM sample_data
2029+
| EVAL client_ip = client_ip::keyword
2030+
| LOOKUP JOIN clientips_lookup ON client_ip
2031+
| KEEP @timestamp, *e*, client_ip
2032+
| LOOKUP JOIN message_types_lookup ON message
2033+
| SORT @timestamp
2034+
| DROP *e""",
2035+
Set.of("client_ip", "client_ip.*", "message", "message.*", "@timestamp", "@timestamp.*", "*e*", "*e", "*e.*"),
2036+
Set.of("message_types_lookup")
2037+
);
2038+
}
2039+
2040+
public void testDropWildcardFieldsAfterKeepAndLookupJoins() {
2041+
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
2042+
assertFieldNames(
2043+
"""
2044+
FROM sample_data
2045+
| EVAL client_ip = client_ip::keyword
2046+
| KEEP @timestamp, *e*, client_ip
2047+
| LOOKUP JOIN clientips_lookup ON client_ip
2048+
| LOOKUP JOIN message_types_lookup ON message
2049+
| SORT @timestamp
2050+
| DROP *e""",
2051+
Set.of("client_ip", "client_ip.*", "message", "message.*", "@timestamp", "@timestamp.*", "*e*", "*e", "*e.*"),
2052+
Set.of("clientips_lookup", "message_types_lookup")
2053+
);
2054+
}
2055+
2056+
public void testDropWildcardFieldsAfterKeepAndLookupJoins2() {
2057+
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
2058+
assertFieldNames(
2059+
"""
2060+
FROM sample_data
2061+
| EVAL client_ip = client_ip::keyword
2062+
| KEEP @timestamp, *e*, client_ip
2063+
| LOOKUP JOIN clientips_lookup ON client_ip
2064+
| DROP *e
2065+
| LOOKUP JOIN message_types_lookup ON message
2066+
| SORT @timestamp
2067+
| DROP *e, client_ip""",
2068+
Set.of("client_ip", "client_ip.*", "message", "message.*", "@timestamp", "@timestamp.*", "*e*", "*e", "*e.*"),
2069+
Set.of("clientips_lookup", "message_types_lookup")
2070+
);
2071+
}
2072+
20012073
private Set<String> fieldNames(String query, Set<String> enrichPolicyMatchFields) {
20022074
var preAnalysisResult = new EsqlSession.PreAnalysisResult(null);
20032075
return EsqlSession.fieldNames(parser.createStatement(query), enrichPolicyMatchFields, preAnalysisResult).fieldNames();

0 commit comments

Comments
 (0)