Skip to content

Commit 1a3494f

Browse files
authored
ESQL: Fix wildcard DROP after LOOKUP JOIN (#130448) (#130685)
In EsqlSession#fieldNames, keep track of wildcard patterns in DROP separately from the references and wildcard patterns in KEEP commands. Only the latter are relevant to determine if we need to ask for all fields for the lookup index in the field caps request.
1 parent 84cc587 commit 1a3494f

File tree

5 files changed

+148
-16
lines changed

5 files changed

+148
-16
lines changed

docs/changelog/130448.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 130448
2+
summary: Fix wildcard drop after lookup join
3+
area: ES|QL
4+
type: bug
5+
issues:
6+
- 129561

x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1841,6 +1841,41 @@ max:long
18411841
8268153
18421842
;
18431843

1844+
wildcardDropAfterLookupJoin
1845+
required_capability: join_lookup_v12
1846+
required_capability: drop_with_wildcard_after_lookup_join
1847+
1848+
ROW somefield = 0, type = "Production"
1849+
| KEEP somefield, type
1850+
| LOOKUP JOIN message_types_lookup ON type
1851+
| DROP *field
1852+
;
1853+
1854+
type:keyword | message:keyword
1855+
Production | Production environment
1856+
;
1857+
1858+
1859+
wildcardDropAfterLookupJoinTwice
1860+
required_capability: join_lookup_v12
1861+
required_capability: drop_with_wildcard_after_lookup_join
1862+
1863+
ROW somefield = 0, type = "Production"
1864+
| KEEP somefield, type
1865+
| EVAL otherfield = 123, language_code = 3
1866+
| LOOKUP JOIN message_types_lookup ON type
1867+
| DROP *field
1868+
| EVAL foofield = 123
1869+
| KEEP *
1870+
| LOOKUP JOIN languages_lookup ON language_code
1871+
| DROP *ge, *field
1872+
;
1873+
1874+
type:keyword | language_code:integer | language_name:keyword
1875+
Production | 3 | Spanish
1876+
;
1877+
1878+
18441879
###############################################
18451880
# LOOKUP JOIN on mixed numerical fields
18461881
###############################################

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1069,6 +1069,13 @@ public enum Cap {
10691069
*/
10701070
DROP_AGAIN_WITH_WILDCARD_AFTER_EVAL,
10711071

1072+
/**
1073+
* Correctly ask for all fields from lookup indices even when there is e.g. a {@code DROP *field} after.
1074+
* See <a href="https://github.com/elastic/elasticsearch/issues/129561">
1075+
* ES|QL: missing columns for wildcard drop after lookup join #129561</a>
1076+
*/
1077+
DROP_WITH_WILDCARD_AFTER_LOOKUP_JOIN,
1078+
10721079
/**
10731080
* Support last_over_time aggregation that gets evaluated per time-series
10741081
*/

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -627,16 +627,20 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
627627
}
628628

629629
var referencesBuilder = AttributeSet.builder();
630-
// "keep" and "drop" attributes are special whenever a wildcard is used in their name, as the wildcard can shadow some
631-
// attributes ("lookup join" generated columns among others) and steps like removal of Aliases should ignore the fields
632-
// to remove if their name matches one of these wildcards.
630+
// "keep" and "drop" attributes are special whenever a wildcard is used in their name, as the wildcard can cover some
631+
// attributes ("lookup join" generated columns among others); steps like removal of Aliases should ignore fields matching the
632+
// wildcards.
633633
//
634-
// ie "from test | eval lang = languages + 1 | keep *l" should consider both "languages" and "*l" as valid fields to ask for
635-
// "from test | eval first_name = 1 | drop first_name | drop *name should also consider "*name" as valid field to ask for
634+
// E.g. "from test | eval lang = languages + 1 | keep *l" should consider both "languages" and "*l" as valid fields to ask for
635+
// "from test | eval first_name = 1 | drop first_name | drop *name" should also consider "*name" as valid field to ask for
636636
//
637637
// NOTE: the grammar allows wildcards to be used in other commands as well, but these are forbidden in the LogicalPlanBuilder
638-
var shadowingRefsBuilder = AttributeSet.builder();
639-
var keepJoinRefsBuilder = AttributeSet.builder();
638+
// Except in KEEP and DROP.
639+
var keepRefs = AttributeSet.builder();
640+
var dropWildcardRefs = AttributeSet.builder();
641+
// fields required to request for lookup joins to work
642+
var joinRefs = AttributeSet.builder();
643+
// lookup indices where we request "*" because we may require all their fields
640644
Set<String> wildcardJoinIndices = new java.util.HashSet<>();
641645

642646
boolean[] canRemoveAliases = new boolean[] { true };
@@ -654,14 +658,14 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
654658
referencesBuilder.addAll(enrichRefs);
655659
} else if (p instanceof LookupJoin join) {
656660
if (join.config().type() instanceof JoinTypes.UsingJoinType usingJoinType) {
657-
keepJoinRefsBuilder.addAll(usingJoinType.columns());
661+
joinRefs.addAll(usingJoinType.columns());
658662
}
659-
if (shadowingRefsBuilder.isEmpty()) {
663+
if (keepRefs.isEmpty()) {
660664
// No KEEP commands after the JOIN, so we need to mark this index for "*" field resolution
661665
wildcardJoinIndices.add(((UnresolvedRelation) join.right()).indexPattern().indexPattern());
662666
} else {
663667
// Keep commands can reference the join columns with names that shadow aliases, so we block their removal
664-
keepJoinRefsBuilder.addAll(shadowingRefsBuilder);
668+
joinRefs.addAll(keepRefs);
665669
}
666670
} else {
667671
referencesBuilder.addAll(p.references());
@@ -673,10 +677,16 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
673677
p.forEachExpression(UnresolvedNamePattern.class, up -> {
674678
var ua = new UnresolvedAttribute(up.source(), up.name());
675679
referencesBuilder.add(ua);
676-
shadowingRefsBuilder.add(ua);
680+
if (p instanceof Keep) {
681+
keepRefs.add(ua);
682+
} else if (p instanceof Drop) {
683+
dropWildcardRefs.add(ua);
684+
} else {
685+
throw new IllegalStateException("Only KEEP and DROP should allow wildcards");
686+
}
677687
});
678688
if (p instanceof Keep) {
679-
shadowingRefsBuilder.addAll(p.references());
689+
keepRefs.addAll(p.references());
680690
}
681691
}
682692

@@ -710,13 +720,15 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
710720
if (fieldNames.contains(ne.name())) {
711721
return;
712722
}
713-
referencesBuilder.removeIf(attr -> matchByName(attr, ne.name(), shadowingRefsBuilder.contains(attr)));
723+
referencesBuilder.removeIf(
724+
attr -> matchByName(attr, ne.name(), keepRefs.contains(attr) || dropWildcardRefs.contains(attr))
725+
);
714726
});
715727
}
716728
});
717729

718730
// Add JOIN ON column references afterward to avoid Alias removal
719-
referencesBuilder.addAll(keepJoinRefsBuilder);
731+
referencesBuilder.addAll(joinRefs);
720732
// If any JOIN commands need wildcard field-caps calls, persist the index names
721733
if (wildcardJoinIndices.isEmpty() == false) {
722734
result = result.withWildcardJoinIndices(wildcardJoinIndices);

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java

Lines changed: 74 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1969,7 +1969,7 @@ public void testDropAgainWithWildcardAfterEval() {
19691969
""", Set.of("emp_no", "emp_no.*", "*name", "*name.*"));
19701970
}
19711971

1972-
public void testDropWildcardedFields_AfterRename() {
1972+
public void testDropWildcardFieldsAfterRename() {
19731973
assertFieldNames(
19741974
"""
19751975
from employees
@@ -1982,7 +1982,30 @@ public void testDropWildcardedFields_AfterRename() {
19821982
);
19831983
}
19841984

1985-
public void testDropWildcardFields_WithLookupJoin() {
1985+
public void testDropWildcardFieldsAfterLookupJoins() {
1986+
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
1987+
assertFieldNames("""
1988+
FROM sample_data
1989+
| EVAL client_ip = client_ip::keyword
1990+
| LOOKUP JOIN clientips_lookup ON client_ip
1991+
| LOOKUP JOIN message_types_lookup ON message
1992+
| SORT @timestamp
1993+
| DROP *e""", Set.of("*"), Set.of());
1994+
}
1995+
1996+
public void testDropWildcardFieldsAfterLookupJoins2() {
1997+
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
1998+
assertFieldNames("""
1999+
FROM sample_data
2000+
| EVAL client_ip = client_ip::keyword
2001+
| LOOKUP JOIN clientips_lookup ON client_ip
2002+
| DROP *e, client_ip
2003+
| LOOKUP JOIN message_types_lookup ON message
2004+
| SORT @timestamp
2005+
| DROP *e""", Set.of("*"), Set.of());
2006+
}
2007+
2008+
public void testDropWildcardFieldsAfterLookupJoinsAndKeep() {
19862009
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
19872010
assertFieldNames(
19882011
"""
@@ -1998,6 +2021,55 @@ public void testDropWildcardFields_WithLookupJoin() {
19982021
);
19992022
}
20002023

2024+
public void testDropWildcardFieldsAfterLookupJoinKeepLookupJoin() {
2025+
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
2026+
assertFieldNames(
2027+
"""
2028+
FROM sample_data
2029+
| EVAL client_ip = client_ip::keyword
2030+
| LOOKUP JOIN clientips_lookup ON client_ip
2031+
| KEEP @timestamp, *e*, client_ip
2032+
| LOOKUP JOIN message_types_lookup ON message
2033+
| SORT @timestamp
2034+
| DROP *e""",
2035+
Set.of("client_ip", "client_ip.*", "message", "message.*", "@timestamp", "@timestamp.*", "*e*", "*e", "*e.*"),
2036+
Set.of("message_types_lookup")
2037+
);
2038+
}
2039+
2040+
public void testDropWildcardFieldsAfterKeepAndLookupJoins() {
2041+
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
2042+
assertFieldNames(
2043+
"""
2044+
FROM sample_data
2045+
| EVAL client_ip = client_ip::keyword
2046+
| KEEP @timestamp, *e*, client_ip
2047+
| LOOKUP JOIN clientips_lookup ON client_ip
2048+
| LOOKUP JOIN message_types_lookup ON message
2049+
| SORT @timestamp
2050+
| DROP *e""",
2051+
Set.of("client_ip", "client_ip.*", "message", "message.*", "@timestamp", "@timestamp.*", "*e*", "*e", "*e.*"),
2052+
Set.of("clientips_lookup", "message_types_lookup")
2053+
);
2054+
}
2055+
2056+
public void testDropWildcardFieldsAfterKeepAndLookupJoins2() {
2057+
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
2058+
assertFieldNames(
2059+
"""
2060+
FROM sample_data
2061+
| EVAL client_ip = client_ip::keyword
2062+
| KEEP @timestamp, *e*, client_ip
2063+
| LOOKUP JOIN clientips_lookup ON client_ip
2064+
| DROP *e
2065+
| LOOKUP JOIN message_types_lookup ON message
2066+
| SORT @timestamp
2067+
| DROP *e, client_ip""",
2068+
Set.of("client_ip", "client_ip.*", "message", "message.*", "@timestamp", "@timestamp.*", "*e*", "*e", "*e.*"),
2069+
Set.of("clientips_lookup", "message_types_lookup")
2070+
);
2071+
}
2072+
20012073
public void testForkFieldsWithKeepAfterFork() {
20022074
assertFieldNames("""
20032075
FROM test

0 commit comments

Comments
 (0)