Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/changelog/130448.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 130448
summary: Fix wildcard drop after lookup join
area: ES|QL
type: bug
issues:
- 129561
Original file line number Diff line number Diff line change
Expand Up @@ -1841,6 +1841,21 @@ max:long
8268153
;

wildcardDropAfterLookupJoin
required_capability: join_lookup_v12
required_capability: drop_with_wildcard_after_lookup_join

ROW somefield = 0, type = "Production"
| KEEP somefield, type
| LOOKUP JOIN message_types_lookup ON type
| DROP *field
;

type:keyword | message:keyword
Production | Production environment
;


###############################################
# LOOKUP JOIN on mixed numerical fields
###############################################
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1069,6 +1069,13 @@ public enum Cap {
*/
DROP_AGAIN_WITH_WILDCARD_AFTER_EVAL,

/**
* Correctly ask for all fields from lookup indices even when there is e.g. a {@code DROP *field} after.
* See <a href="https://github.com/elastic/elasticsearch/issues/129561">
* ES|QL: missing columns for wildcard drop after lookup join #129561</a>
*/
DROP_WITH_WILDCARD_AFTER_LOOKUP_JOIN,

/**
* Support last_over_time aggregation that gets evaluated per time-series
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -627,16 +627,20 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
}

var referencesBuilder = AttributeSet.builder();
// "keep" and "drop" attributes are special whenever a wildcard is used in their name, as the wildcard can shadow some
// attributes ("lookup join" generated columns among others) and steps like removal of Aliases should ignore the fields
// to remove if their name matches one of these wildcards.
// "keep" and "drop" attributes are special whenever a wildcard is used in their name, as the wildcard can cover some
// attributes ("lookup join" generated columns among others); steps like removal of Aliases should ignore fields matching the
// wildcards.
//
// ie "from test | eval lang = languages + 1 | keep *l" should consider both "languages" and "*l" as valid fields to ask for
// "from test | eval first_name = 1 | drop first_name | drop *name should also consider "*name" as valid field to ask for
// E.g. "from test | eval lang = languages + 1 | keep *l" should consider both "languages" and "*l" as valid fields to ask for
// "from test | eval first_name = 1 | drop first_name | drop *name" should also consider "*name" as valid field to ask for
//
// NOTE: the grammar allows wildcards to be used in other commands as well, but these are forbidden in the LogicalPlanBuilder
var shadowingRefsBuilder = AttributeSet.builder();
var keepJoinRefsBuilder = AttributeSet.builder();
// Except in KEEP and DROP.
var keepRefs = AttributeSet.builder();
var dropWildcardRefs = AttributeSet.builder();
// fields required to request for lookup joins to work
var joinRefs = AttributeSet.builder();
// lookup indices where we request "*" because we may require all their fields
Set<String> wildcardJoinIndices = new java.util.HashSet<>();

boolean[] canRemoveAliases = new boolean[] { true };
Expand All @@ -654,14 +658,14 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
referencesBuilder.addAll(enrichRefs);
} else if (p instanceof LookupJoin join) {
if (join.config().type() instanceof JoinTypes.UsingJoinType usingJoinType) {
keepJoinRefsBuilder.addAll(usingJoinType.columns());
joinRefs.addAll(usingJoinType.columns());
}
if (shadowingRefsBuilder.isEmpty()) {
if (keepRefs.isEmpty()) {
// No KEEP commands after the JOIN, so we need to mark this index for "*" field resolution
wildcardJoinIndices.add(((UnresolvedRelation) join.right()).indexPattern().indexPattern());
} else {
// Keep commands can reference the join columns with names that shadow aliases, so we block their removal
keepJoinRefsBuilder.addAll(shadowingRefsBuilder);
joinRefs.addAll(keepRefs);
}
} else {
referencesBuilder.addAll(p.references());
Expand All @@ -673,10 +677,16 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
p.forEachExpression(UnresolvedNamePattern.class, up -> {
var ua = new UnresolvedAttribute(up.source(), up.name());
referencesBuilder.add(ua);
shadowingRefsBuilder.add(ua);
if (p instanceof Keep) {
keepRefs.add(ua);
} else if (p instanceof Drop) {
dropWildcardRefs.add(ua);
} else {
throw new IllegalStateException("Only KEEP and DROP should allow wildcards");
}
});
if (p instanceof Keep) {
shadowingRefsBuilder.addAll(p.references());
keepRefs.addAll(p.references());
}
}

Expand Down Expand Up @@ -710,13 +720,15 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
if (fieldNames.contains(ne.name())) {
return;
}
referencesBuilder.removeIf(attr -> matchByName(attr, ne.name(), shadowingRefsBuilder.contains(attr)));
referencesBuilder.removeIf(
attr -> matchByName(attr, ne.name(), keepRefs.contains(attr) || dropWildcardRefs.contains(attr))
);
});
}
});

// Add JOIN ON column references afterward to avoid Alias removal
referencesBuilder.addAll(keepJoinRefsBuilder);
referencesBuilder.addAll(joinRefs);
// If any JOIN commands need wildcard field-caps calls, persist the index names
if (wildcardJoinIndices.isEmpty() == false) {
result = result.withWildcardJoinIndices(wildcardJoinIndices);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1969,7 +1969,7 @@ public void testDropAgainWithWildcardAfterEval() {
""", Set.of("emp_no", "emp_no.*", "*name", "*name.*"));
}

public void testDropWildcardedFields_AfterRename() {
public void testDropWildcardFieldsAfterRename() {
assertFieldNames(
"""
from employees
Expand All @@ -1982,7 +1982,30 @@ public void testDropWildcardedFields_AfterRename() {
);
}

public void testDropWildcardFields_WithLookupJoin() {
public void testDropWildcardFieldsAfterLookupJoins() {
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
assertFieldNames("""
FROM sample_data
| EVAL client_ip = client_ip::keyword
| LOOKUP JOIN clientips_lookup ON client_ip
| LOOKUP JOIN message_types_lookup ON message
| SORT @timestamp
| DROP *e""", Set.of("*"), Set.of());
}

public void testDropWildcardFieldsAfterLookupJoins2() {
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
assertFieldNames("""
FROM sample_data
| EVAL client_ip = client_ip::keyword
| LOOKUP JOIN clientips_lookup ON client_ip
| DROP *e, client_ip
| LOOKUP JOIN message_types_lookup ON message
| SORT @timestamp
| DROP *e""", Set.of("*"), Set.of());
}

public void testDropWildcardFieldsAfterLookupJoinsAndKeep() {
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
assertFieldNames(
"""
Expand All @@ -1998,6 +2021,55 @@ public void testDropWildcardFields_WithLookupJoin() {
);
}

public void testDropWildcardFieldsAfterLookupJoinKeepLookupJoin() {
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
assertFieldNames(
"""
FROM sample_data
| EVAL client_ip = client_ip::keyword
| LOOKUP JOIN clientips_lookup ON client_ip
| KEEP @timestamp, *e*, client_ip
| LOOKUP JOIN message_types_lookup ON message
| SORT @timestamp
| DROP *e""",
Set.of("client_ip", "client_ip.*", "message", "message.*", "@timestamp", "@timestamp.*", "*e*", "*e", "*e.*"),
Set.of("message_types_lookup")
);
}

public void testDropWildcardFieldsAfterKeepAndLookupJoins() {
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
assertFieldNames(
"""
FROM sample_data
| EVAL client_ip = client_ip::keyword
| KEEP @timestamp, *e*, client_ip
| LOOKUP JOIN clientips_lookup ON client_ip
| LOOKUP JOIN message_types_lookup ON message
| SORT @timestamp
| DROP *e""",
Set.of("client_ip", "client_ip.*", "message", "message.*", "@timestamp", "@timestamp.*", "*e*", "*e", "*e.*"),
Set.of("clientips_lookup", "message_types_lookup")
);
}

public void testDropWildcardFieldsAfterKeepAndLookupJoins2() {
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
assertFieldNames(
"""
FROM sample_data
| EVAL client_ip = client_ip::keyword
| KEEP @timestamp, *e*, client_ip
| LOOKUP JOIN clientips_lookup ON client_ip
| DROP *e
| LOOKUP JOIN message_types_lookup ON message
| SORT @timestamp
| DROP *e, client_ip""",
Set.of("client_ip", "client_ip.*", "message", "message.*", "@timestamp", "@timestamp.*", "*e*", "*e", "*e.*"),
Set.of("clientips_lookup", "message_types_lookup")
);
}

public void testForkFieldsWithKeepAfterFork() {
assertFieldNames("""
FROM test
Expand Down
Loading