Skip to content

Commit 6288fe2

Browse files
authored
ESQL: Fix wildcard DROP after LOOKUP JOIN (#130448) (#130696)
In EsqlSession#fieldNames, keep track of wildcard patterns in DROP separately from the references and wildcard patterns in KEEP commands. Only the latter are relevant to determine if we need to ask for all fields for the lookup index in the field caps request. (cherry picked from commit 05fc6f2) # Conflicts: # x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec # x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java # x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java
1 parent 8fd629f commit 6288fe2

File tree

5 files changed

+148
-16
lines changed

5 files changed

+148
-16
lines changed

docs/changelog/130448.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 130448
2+
summary: Fix wildcard drop after lookup join
3+
area: ES|QL
4+
type: bug
5+
issues:
6+
- 129561

x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1754,6 +1754,41 @@ emp_no:integer | language_code:integer | language_name:keyword
17541754
10004 |null |null
17551755
;
17561756

1757+
wildcardDropAfterLookupJoin
1758+
required_capability: join_lookup_v12
1759+
required_capability: drop_with_wildcard_after_lookup_join
1760+
1761+
ROW somefield = 0, type = "Production"
1762+
| KEEP somefield, type
1763+
| LOOKUP JOIN message_types_lookup ON type
1764+
| DROP *field
1765+
;
1766+
1767+
type:keyword | message:keyword
1768+
Production | Production environment
1769+
;
1770+
1771+
1772+
wildcardDropAfterLookupJoinTwice
1773+
required_capability: join_lookup_v12
1774+
required_capability: drop_with_wildcard_after_lookup_join
1775+
1776+
ROW somefield = 0, type = "Production"
1777+
| KEEP somefield, type
1778+
| EVAL otherfield = 123, language_code = 3
1779+
| LOOKUP JOIN message_types_lookup ON type
1780+
| DROP *field
1781+
| EVAL foofield = 123
1782+
| KEEP *
1783+
| LOOKUP JOIN languages_lookup ON language_code
1784+
| DROP *ge, *field
1785+
;
1786+
1787+
type:keyword | language_code:integer | language_name:keyword
1788+
Production | 3 | Spanish
1789+
;
1790+
1791+
17571792
###############################################
17581793
# LOOKUP JOIN on mixed numerical fields
17591794
###############################################

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -909,6 +909,13 @@ public enum Cap {
909909
*/
910910
DROP_AGAIN_WITH_WILDCARD_AFTER_EVAL,
911911

912+
/**
913+
* Correctly ask for all fields from lookup indices even when there is e.g. a {@code DROP *field} after.
914+
* See <a href="https://github.com/elastic/elasticsearch/issues/129561">
915+
* ES|QL: missing columns for wildcard drop after lookup join #129561</a>
916+
*/
917+
DROP_WITH_WILDCARD_AFTER_LOOKUP_JOIN,
918+
912919
/**
913920
* During resolution (pre-analysis) we have to consider that joins can override regex extracted values
914921
* see <a href="https://github.com/elastic/elasticsearch/issues/127467"> ES|QL: pruning of JOINs leads to missing fields #127467</a>

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -574,16 +574,20 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
574574
}
575575

576576
var referencesBuilder = AttributeSet.builder();
577-
// "keep" and "drop" attributes are special whenever a wildcard is used in their name, as the wildcard can shadow some
578-
// attributes ("lookup join" generated columns among others) and steps like removal of Aliases should ignore the fields
579-
// to remove if their name matches one of these wildcards.
577+
// "keep" and "drop" attributes are special whenever a wildcard is used in their name, as the wildcard can cover some
578+
// attributes ("lookup join" generated columns among others); steps like removal of Aliases should ignore fields matching the
579+
// wildcards.
580580
//
581-
// ie "from test | eval lang = languages + 1 | keep *l" should consider both "languages" and "*l" as valid fields to ask for
582-
// "from test | eval first_name = 1 | drop first_name | drop *name should also consider "*name" as valid field to ask for
581+
// E.g. "from test | eval lang = languages + 1 | keep *l" should consider both "languages" and "*l" as valid fields to ask for
582+
// "from test | eval first_name = 1 | drop first_name | drop *name" should also consider "*name" as valid field to ask for
583583
//
584584
// NOTE: the grammar allows wildcards to be used in other commands as well, but these are forbidden in the LogicalPlanBuilder
585-
var shadowingRefsBuilder = AttributeSet.builder();
586-
var keepJoinRefsBuilder = AttributeSet.builder();
585+
// Except in KEEP and DROP.
586+
var keepRefs = AttributeSet.builder();
587+
var dropWildcardRefs = AttributeSet.builder();
588+
// fields required to request for lookup joins to work
589+
var joinRefs = AttributeSet.builder();
590+
// lookup indices where we request "*" because we may require all their fields
587591
Set<String> wildcardJoinIndices = new java.util.HashSet<>();
588592

589593
boolean[] canRemoveAliases = new boolean[] { true };
@@ -601,14 +605,14 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
601605
referencesBuilder.addAll(enrichRefs);
602606
} else if (p instanceof LookupJoin join) {
603607
if (join.config().type() instanceof JoinTypes.UsingJoinType usingJoinType) {
604-
keepJoinRefsBuilder.addAll(usingJoinType.columns());
608+
joinRefs.addAll(usingJoinType.columns());
605609
}
606-
if (shadowingRefsBuilder.isEmpty()) {
610+
if (keepRefs.isEmpty()) {
607611
// No KEEP commands after the JOIN, so we need to mark this index for "*" field resolution
608612
wildcardJoinIndices.add(((UnresolvedRelation) join.right()).indexPattern().indexPattern());
609613
} else {
610614
// Keep commands can reference the join columns with names that shadow aliases, so we block their removal
611-
keepJoinRefsBuilder.addAll(shadowingRefsBuilder);
615+
joinRefs.addAll(keepRefs);
612616
}
613617
} else {
614618
referencesBuilder.addAll(p.references());
@@ -620,10 +624,16 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
620624
p.forEachExpression(UnresolvedNamePattern.class, up -> {
621625
var ua = new UnresolvedAttribute(up.source(), up.name());
622626
referencesBuilder.add(ua);
623-
shadowingRefsBuilder.add(ua);
627+
if (p instanceof Keep) {
628+
keepRefs.add(ua);
629+
} else if (p instanceof Drop) {
630+
dropWildcardRefs.add(ua);
631+
} else {
632+
throw new IllegalStateException("Only KEEP and DROP should allow wildcards");
633+
}
624634
});
625635
if (p instanceof Keep) {
626-
shadowingRefsBuilder.addAll(p.references());
636+
keepRefs.addAll(p.references());
627637
}
628638
}
629639

@@ -657,13 +667,15 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
657667
if (fieldNames.contains(ne.name())) {
658668
return;
659669
}
660-
referencesBuilder.removeIf(attr -> matchByName(attr, ne.name(), shadowingRefsBuilder.contains(attr)));
670+
referencesBuilder.removeIf(
671+
attr -> matchByName(attr, ne.name(), keepRefs.contains(attr) || dropWildcardRefs.contains(attr))
672+
);
661673
});
662674
}
663675
});
664676

665677
// Add JOIN ON column references afterward to avoid Alias removal
666-
referencesBuilder.addAll(keepJoinRefsBuilder);
678+
referencesBuilder.addAll(joinRefs);
667679
// If any JOIN commands need wildcard field-caps calls, persist the index names
668680
if (wildcardJoinIndices.isEmpty() == false) {
669681
result = result.withWildcardJoinIndices(wildcardJoinIndices);

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java

Lines changed: 74 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1758,7 +1758,7 @@ public void testDropAgainWithWildcardAfterEval() {
17581758
""", Set.of("emp_no", "emp_no.*", "*name", "*name.*"));
17591759
}
17601760

1761-
public void testDropWildcardedFields_AfterRename() {
1761+
public void testDropWildcardFieldsAfterRename() {
17621762
assertFieldNames(
17631763
"""
17641764
from employees
@@ -1771,7 +1771,30 @@ public void testDropWildcardedFields_AfterRename() {
17711771
);
17721772
}
17731773

1774-
public void testDropWildcardFields_WithLookupJoin() {
1774+
public void testDropWildcardFieldsAfterLookupJoins() {
1775+
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
1776+
assertFieldNames("""
1777+
FROM sample_data
1778+
| EVAL client_ip = client_ip::keyword
1779+
| LOOKUP JOIN clientips_lookup ON client_ip
1780+
| LOOKUP JOIN message_types_lookup ON message
1781+
| SORT @timestamp
1782+
| DROP *e""", Set.of("*"), Set.of());
1783+
}
1784+
1785+
public void testDropWildcardFieldsAfterLookupJoins2() {
1786+
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
1787+
assertFieldNames("""
1788+
FROM sample_data
1789+
| EVAL client_ip = client_ip::keyword
1790+
| LOOKUP JOIN clientips_lookup ON client_ip
1791+
| DROP *e, client_ip
1792+
| LOOKUP JOIN message_types_lookup ON message
1793+
| SORT @timestamp
1794+
| DROP *e""", Set.of("*"), Set.of());
1795+
}
1796+
1797+
public void testDropWildcardFieldsAfterLookupJoinsAndKeep() {
17751798
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
17761799
assertFieldNames(
17771800
"""
@@ -1787,6 +1810,55 @@ public void testDropWildcardFields_WithLookupJoin() {
17871810
);
17881811
}
17891812

1813+
public void testDropWildcardFieldsAfterLookupJoinKeepLookupJoin() {
1814+
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
1815+
assertFieldNames(
1816+
"""
1817+
FROM sample_data
1818+
| EVAL client_ip = client_ip::keyword
1819+
| LOOKUP JOIN clientips_lookup ON client_ip
1820+
| KEEP @timestamp, *e*, client_ip
1821+
| LOOKUP JOIN message_types_lookup ON message
1822+
| SORT @timestamp
1823+
| DROP *e""",
1824+
Set.of("client_ip", "client_ip.*", "message", "message.*", "@timestamp", "@timestamp.*", "*e*", "*e", "*e.*"),
1825+
Set.of("message_types_lookup")
1826+
);
1827+
}
1828+
1829+
public void testDropWildcardFieldsAfterKeepAndLookupJoins() {
1830+
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
1831+
assertFieldNames(
1832+
"""
1833+
FROM sample_data
1834+
| EVAL client_ip = client_ip::keyword
1835+
| KEEP @timestamp, *e*, client_ip
1836+
| LOOKUP JOIN clientips_lookup ON client_ip
1837+
| LOOKUP JOIN message_types_lookup ON message
1838+
| SORT @timestamp
1839+
| DROP *e""",
1840+
Set.of("client_ip", "client_ip.*", "message", "message.*", "@timestamp", "@timestamp.*", "*e*", "*e", "*e.*"),
1841+
Set.of("clientips_lookup", "message_types_lookup")
1842+
);
1843+
}
1844+
1845+
public void testDropWildcardFieldsAfterKeepAndLookupJoins2() {
1846+
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
1847+
assertFieldNames(
1848+
"""
1849+
FROM sample_data
1850+
| EVAL client_ip = client_ip::keyword
1851+
| KEEP @timestamp, *e*, client_ip
1852+
| LOOKUP JOIN clientips_lookup ON client_ip
1853+
| DROP *e
1854+
| LOOKUP JOIN message_types_lookup ON message
1855+
| SORT @timestamp
1856+
| DROP *e, client_ip""",
1857+
Set.of("client_ip", "client_ip.*", "message", "message.*", "@timestamp", "@timestamp.*", "*e*", "*e", "*e.*"),
1858+
Set.of("clientips_lookup", "message_types_lookup")
1859+
);
1860+
}
1861+
17901862
private Set<String> fieldNames(String query, Set<String> enrichPolicyMatchFields) {
17911863
var preAnalysisResult = new EsqlSession.PreAnalysisResult(null);
17921864
return EsqlSession.fieldNames(parser.createStatement(query), enrichPolicyMatchFields, preAnalysisResult).fieldNames();

0 commit comments

Comments
 (0)