Skip to content

Commit 8654bb0

Browse files
alex-spieselasticsearchmachine
andauthored
[8.18] ESQL: Fix wildcard DROP after LOOKUP JOIN (#130448) (#130698)
* ESQL: Fix wildcard DROP after LOOKUP JOIN (#130448) In EsqlSession#fieldNames, keep track of wildcard patterns in DROP separately from the references and wildcard patterns in KEEP commands. Only the latter are relevant to determine if we need to ask for all fields for the lookup index in the field caps request. (cherry picked from commit 05fc6f2) # Conflicts: # x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec # x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java # x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java # x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java * [CI] Auto commit changes from spotless --------- Co-authored-by: elasticsearchmachine <[email protected]>
1 parent 838984b commit 8654bb0

File tree

5 files changed

+145
-16
lines changed

5 files changed

+145
-16
lines changed

docs/changelog/130448.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 130448
2+
summary: Fix wildcard drop after lookup join
3+
area: ES|QL
4+
type: bug
5+
issues:
6+
- 129561

x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1682,3 +1682,37 @@ max:long
16821682
3450233
16831683
8268153
16841684
;
1685+
1686+
wildcardDropAfterLookupJoin
1687+
required_capability: join_lookup_v12
1688+
required_capability: drop_with_wildcard_after_lookup_join
1689+
1690+
ROW somefield = 0, type = "Production"
1691+
| KEEP somefield, type
1692+
| LOOKUP JOIN message_types_lookup ON type
1693+
| DROP *field
1694+
;
1695+
1696+
type:keyword | message:keyword
1697+
Production | Production environment
1698+
;
1699+
1700+
1701+
wildcardDropAfterLookupJoinTwice
1702+
required_capability: join_lookup_v12
1703+
required_capability: drop_with_wildcard_after_lookup_join
1704+
1705+
ROW somefield = 0, type = "Production"
1706+
| KEEP somefield, type
1707+
| EVAL otherfield = 123, language_code = 3
1708+
| LOOKUP JOIN message_types_lookup ON type
1709+
| DROP *field
1710+
| EVAL foofield = 123
1711+
| KEEP *
1712+
| LOOKUP JOIN languages_lookup ON language_code
1713+
| DROP *ge, *field
1714+
;
1715+
1716+
type:keyword | language_code:integer | language_name:keyword
1717+
Production | 3 | Spanish
1718+
;

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -742,6 +742,13 @@ public enum Cap {
742742
*/
743743
DROP_AGAIN_WITH_WILDCARD_AFTER_EVAL,
744744

745+
/**
746+
* Correctly ask for all fields from lookup indices even when there is e.g. a {@code DROP *field} after.
747+
* See <a href="https://github.com/elastic/elasticsearch/issues/129561">
748+
* ES|QL: missing columns for wildcard drop after lookup join #129561</a>
749+
*/
750+
DROP_WITH_WILDCARD_AFTER_LOOKUP_JOIN,
751+
745752
/**
746753
* During resolution (pre-analysis) we have to consider that joins can override regex extracted values
747754
* see <a href="https://github.com/elastic/elasticsearch/issues/127467"> ES|QL: pruning of JOINs leads to missing fields #127467</a>

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -578,16 +578,20 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
578578
}
579579

580580
AttributeSet references = new AttributeSet();
581-
// "keep" and "drop" attributes are special whenever a wildcard is used in their name, as the wildcard can shadow some
582-
// attributes ("lookup join" generated columns among others) and steps like removal of Aliases should ignore the fields
583-
// to remove if their name matches one of these wildcards.
581+
// "keep" and "drop" attributes are special whenever a wildcard is used in their name, as the wildcard can cover some
582+
// attributes ("lookup join" generated columns among others); steps like removal of Aliases should ignore fields matching the
583+
// wildcards.
584584
//
585-
// ie "from test | eval lang = languages + 1 | keep *l" should consider both "languages" and "*l" as valid fields to ask for
586-
// "from test | eval first_name = 1 | drop first_name | drop *name should also consider "*name" as valid field to ask for
585+
// E.g. "from test | eval lang = languages + 1 | keep *l" should consider both "languages" and "*l" as valid fields to ask for
586+
// "from test | eval first_name = 1 | drop first_name | drop *name" should also consider "*name" as valid field to ask for
587587
//
588588
// NOTE: the grammar allows wildcards to be used in other commands as well, but these are forbidden in the LogicalPlanBuilder
589-
var shadowingRefs = new AttributeSet();
590-
var keepJoinRefs = new AttributeSet();
589+
// Except in KEEP and DROP.
590+
var keepRefs = new AttributeSet();
591+
var dropWildcardRefs = new AttributeSet();
592+
// fields required to request for lookup joins to work
593+
var joinRefs = new AttributeSet();
594+
// lookup indices where we request "*" because we may require all their fields
591595
Set<String> wildcardJoinIndices = new java.util.HashSet<>();
592596

593597
boolean[] canRemoveAliases = new boolean[] { true };
@@ -605,14 +609,14 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
605609
references.addAll(enrichRefs);
606610
} else if (p instanceof LookupJoin join) {
607611
if (join.config().type() instanceof JoinTypes.UsingJoinType usingJoinType) {
608-
keepJoinRefs.addAll(usingJoinType.columns());
612+
joinRefs.addAll(usingJoinType.columns());
609613
}
610-
if (shadowingRefs.isEmpty()) {
614+
if (keepRefs.isEmpty()) {
611615
// No KEEP commands after the JOIN, so we need to mark this index for "*" field resolution
612616
wildcardJoinIndices.add(((UnresolvedRelation) join.right()).indexPattern().indexPattern());
613617
} else {
614618
// Keep commands can reference the join columns with names that shadow aliases, so we block their removal
615-
keepJoinRefs.addAll(shadowingRefs);
619+
joinRefs.addAll(keepRefs);
616620
}
617621
} else {
618622
references.addAll(p.references());
@@ -624,10 +628,16 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
624628
p.forEachExpression(UnresolvedNamePattern.class, up -> {
625629
var ua = new UnresolvedAttribute(up.source(), up.name());
626630
references.add(ua);
627-
shadowingRefs.add(ua);
631+
if (p instanceof Keep) {
632+
keepRefs.add(ua);
633+
} else if (p instanceof Drop) {
634+
dropWildcardRefs.add(ua);
635+
} else {
636+
throw new IllegalStateException("Only KEEP and DROP should allow wildcards");
637+
}
628638
});
629639
if (p instanceof Keep) {
630-
shadowingRefs.addAll(p.references());
640+
keepRefs.addAll(p.references());
631641
}
632642
}
633643

@@ -661,13 +671,13 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
661671
if (fieldNames.contains(ne.name())) {
662672
return;
663673
}
664-
references.removeIf(attr -> matchByName(attr, ne.name(), shadowingRefs.contains(attr)));
674+
references.removeIf(attr -> matchByName(attr, ne.name(), keepRefs.contains(attr) || dropWildcardRefs.contains(attr)));
665675
});
666676
}
667677
});
668678

669679
// Add JOIN ON column references afterward to avoid Alias removal
670-
references.addAll(keepJoinRefs);
680+
references.addAll(joinRefs);
671681
// If any JOIN commands need wildcard field-caps calls, persist the index names
672682
if (wildcardJoinIndices.isEmpty() == false) {
673683
result = result.withWildcardJoinIndices(wildcardJoinIndices);

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java

Lines changed: 74 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1758,7 +1758,7 @@ public void testDropAgainWithWildcardAfterEval() {
17581758
""", Set.of("emp_no", "emp_no.*", "*name", "*name.*"));
17591759
}
17601760

1761-
public void testDropWildcardedFields_AfterRename() {
1761+
public void testDropWildcardFieldsAfterRename() {
17621762
assertFieldNames(
17631763
"""
17641764
from employees
@@ -1771,7 +1771,30 @@ public void testDropWildcardedFields_AfterRename() {
17711771
);
17721772
}
17731773

1774-
public void testDropWildcardFields_WithLookupJoin() {
1774+
public void testDropWildcardFieldsAfterLookupJoins() {
1775+
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
1776+
assertFieldNames("""
1777+
FROM sample_data
1778+
| EVAL client_ip = client_ip::keyword
1779+
| LOOKUP JOIN clientips_lookup ON client_ip
1780+
| LOOKUP JOIN message_types_lookup ON message
1781+
| SORT @timestamp
1782+
| DROP *e""", Set.of("*"), Set.of());
1783+
}
1784+
1785+
public void testDropWildcardFieldsAfterLookupJoins2() {
1786+
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
1787+
assertFieldNames("""
1788+
FROM sample_data
1789+
| EVAL client_ip = client_ip::keyword
1790+
| LOOKUP JOIN clientips_lookup ON client_ip
1791+
| DROP *e, client_ip
1792+
| LOOKUP JOIN message_types_lookup ON message
1793+
| SORT @timestamp
1794+
| DROP *e""", Set.of("*"), Set.of());
1795+
}
1796+
1797+
public void testDropWildcardFieldsAfterLookupJoinsAndKeep() {
17751798
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
17761799
assertFieldNames(
17771800
"""
@@ -1787,6 +1810,55 @@ public void testDropWildcardFields_WithLookupJoin() {
17871810
);
17881811
}
17891812

1813+
public void testDropWildcardFieldsAfterLookupJoinKeepLookupJoin() {
1814+
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
1815+
assertFieldNames(
1816+
"""
1817+
FROM sample_data
1818+
| EVAL client_ip = client_ip::keyword
1819+
| LOOKUP JOIN clientips_lookup ON client_ip
1820+
| KEEP @timestamp, *e*, client_ip
1821+
| LOOKUP JOIN message_types_lookup ON message
1822+
| SORT @timestamp
1823+
| DROP *e""",
1824+
Set.of("client_ip", "client_ip.*", "message", "message.*", "@timestamp", "@timestamp.*", "*e*", "*e", "*e.*"),
1825+
Set.of("message_types_lookup")
1826+
);
1827+
}
1828+
1829+
public void testDropWildcardFieldsAfterKeepAndLookupJoins() {
1830+
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
1831+
assertFieldNames(
1832+
"""
1833+
FROM sample_data
1834+
| EVAL client_ip = client_ip::keyword
1835+
| KEEP @timestamp, *e*, client_ip
1836+
| LOOKUP JOIN clientips_lookup ON client_ip
1837+
| LOOKUP JOIN message_types_lookup ON message
1838+
| SORT @timestamp
1839+
| DROP *e""",
1840+
Set.of("client_ip", "client_ip.*", "message", "message.*", "@timestamp", "@timestamp.*", "*e*", "*e", "*e.*"),
1841+
Set.of("clientips_lookup", "message_types_lookup")
1842+
);
1843+
}
1844+
1845+
public void testDropWildcardFieldsAfterKeepAndLookupJoins2() {
1846+
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
1847+
assertFieldNames(
1848+
"""
1849+
FROM sample_data
1850+
| EVAL client_ip = client_ip::keyword
1851+
| KEEP @timestamp, *e*, client_ip
1852+
| LOOKUP JOIN clientips_lookup ON client_ip
1853+
| DROP *e
1854+
| LOOKUP JOIN message_types_lookup ON message
1855+
| SORT @timestamp
1856+
| DROP *e, client_ip""",
1857+
Set.of("client_ip", "client_ip.*", "message", "message.*", "@timestamp", "@timestamp.*", "*e*", "*e", "*e.*"),
1858+
Set.of("clientips_lookup", "message_types_lookup")
1859+
);
1860+
}
1861+
17901862
private Set<String> fieldNames(String query, Set<String> enrichPolicyMatchFields) {
17911863
var preAnalysisResult = new EsqlSession.PreAnalysisResult(null);
17921864
return EsqlSession.fieldNames(parser.createStatement(query), enrichPolicyMatchFields, preAnalysisResult).fieldNames();

0 commit comments

Comments
 (0)