Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/changelog/130448.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 130448
summary: Fix wildcard drop after lookup join
area: ES|QL
type: bug
issues:
- 129561
Original file line number Diff line number Diff line change
Expand Up @@ -1684,6 +1684,41 @@ max:long
8268153
;

wildcardDropAfterLookupJoin
required_capability: join_lookup_v12
required_capability: drop_with_wildcard_after_lookup_join

ROW somefield = 0, type = "Production"
| KEEP somefield, type
| LOOKUP JOIN message_types_lookup ON type
| DROP *field
;

type:keyword | message:keyword
Production | Production environment
;


wildcardDropAfterLookupJoinTwice
required_capability: join_lookup_v12
required_capability: drop_with_wildcard_after_lookup_join

ROW somefield = 0, type = "Production"
| KEEP somefield, type
| EVAL otherfield = 123, language_code = 3
| LOOKUP JOIN message_types_lookup ON type
| DROP *field
| EVAL foofield = 123
| KEEP *
| LOOKUP JOIN languages_lookup ON language_code
| DROP *ge, *field
;

type:keyword | language_code:integer | language_name:keyword
Production | 3 | Spanish
;


###############################################
# LOOKUP JOIN on date_nanos field
###############################################
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -908,6 +908,13 @@ public enum Cap {
*/
DROP_AGAIN_WITH_WILDCARD_AFTER_EVAL,

/**
* Correctly ask for all fields from lookup indices even when there is e.g. a {@code DROP *field} after.
* See <a href="https://github.com/elastic/elasticsearch/issues/129561">
* ES|QL: missing columns for wildcard drop after lookup join #129561</a>
*/
DROP_WITH_WILDCARD_AFTER_LOOKUP_JOIN,

/**
* During resolution (pre-analysis) we have to consider that joins can override regex extracted values
* see <a href="https://github.com/elastic/elasticsearch/issues/127467"> ES|QL: pruning of JOINs leads to missing fields #127467 </a>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -588,16 +588,20 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
}

var referencesBuilder = AttributeSet.builder();
// "keep" and "drop" attributes are special whenever a wildcard is used in their name, as the wildcard can shadow some
// attributes ("lookup join" generated columns among others) and steps like removal of Aliases should ignore the fields
// to remove if their name matches one of these wildcards.
// "keep" and "drop" attributes are special whenever a wildcard is used in their name, as the wildcard can cover some
// attributes ("lookup join" generated columns among others); steps like removal of Aliases should ignore fields matching the
// wildcards.
//
// ie "from test | eval lang = languages + 1 | keep *l" should consider both "languages" and "*l" as valid fields to ask for
// "from test | eval first_name = 1 | drop first_name | drop *name should also consider "*name" as valid field to ask for
// E.g. "from test | eval lang = languages + 1 | keep *l" should consider both "languages" and "*l" as valid fields to ask for
// "from test | eval first_name = 1 | drop first_name | drop *name" should also consider "*name" as valid field to ask for
//
// NOTE: the grammar allows wildcards to be used in other commands as well, but these are forbidden in the LogicalPlanBuilder
var shadowingRefsBuilder = AttributeSet.builder();
var keepJoinRefsBuilder = AttributeSet.builder();
// Except in KEEP and DROP.
var keepRefs = AttributeSet.builder();
var dropWildcardRefs = AttributeSet.builder();
// fields required to request for lookup joins to work
var joinRefs = AttributeSet.builder();
// lookup indices where we request "*" because we may require all their fields
Set<String> wildcardJoinIndices = new java.util.HashSet<>();

boolean[] canRemoveAliases = new boolean[] { true };
Expand All @@ -615,14 +619,14 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
referencesBuilder.addAll(enrichRefs);
} else if (p instanceof LookupJoin join) {
if (join.config().type() instanceof JoinTypes.UsingJoinType usingJoinType) {
keepJoinRefsBuilder.addAll(usingJoinType.columns());
joinRefs.addAll(usingJoinType.columns());
}
if (shadowingRefsBuilder.isEmpty()) {
if (keepRefs.isEmpty()) {
// No KEEP commands after the JOIN, so we need to mark this index for "*" field resolution
wildcardJoinIndices.add(((UnresolvedRelation) join.right()).indexPattern().indexPattern());
} else {
// Keep commands can reference the join columns with names that shadow aliases, so we block their removal
keepJoinRefsBuilder.addAll(shadowingRefsBuilder);
joinRefs.addAll(keepRefs);
}
} else {
referencesBuilder.addAll(p.references());
Expand All @@ -634,10 +638,16 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
p.forEachExpression(UnresolvedNamePattern.class, up -> {
var ua = new UnresolvedAttribute(up.source(), up.name());
referencesBuilder.add(ua);
shadowingRefsBuilder.add(ua);
if (p instanceof Keep) {
keepRefs.add(ua);
} else if (p instanceof Drop) {
dropWildcardRefs.add(ua);
} else {
throw new IllegalStateException("Only KEEP and DROP should allow wildcards");
}
});
if (p instanceof Keep) {
shadowingRefsBuilder.addAll(p.references());
keepRefs.addAll(p.references());
}
}

Expand Down Expand Up @@ -671,13 +681,15 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
if (fieldNames.contains(ne.name())) {
return;
}
referencesBuilder.removeIf(attr -> matchByName(attr, ne.name(), shadowingRefsBuilder.contains(attr)));
referencesBuilder.removeIf(
attr -> matchByName(attr, ne.name(), keepRefs.contains(attr) || dropWildcardRefs.contains(attr))
);
});
}
});

// Add JOIN ON column references afterward to avoid Alias removal
referencesBuilder.addAll(keepJoinRefsBuilder);
referencesBuilder.addAll(joinRefs);
// If any JOIN commands need wildcard field-caps calls, persist the index names
if (wildcardJoinIndices.isEmpty() == false) {
result = result.withWildcardJoinIndices(wildcardJoinIndices);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1969,7 +1969,7 @@ public void testDropAgainWithWildcardAfterEval() {
""", Set.of("emp_no", "emp_no.*", "*name", "*name.*"));
}

public void testDropWildcardedFields_AfterRename() {
public void testDropWildcardFieldsAfterRename() {
assertFieldNames(
"""
from employees
Expand All @@ -1982,7 +1982,30 @@ public void testDropWildcardedFields_AfterRename() {
);
}

public void testDropWildcardFields_WithLookupJoin() {
public void testDropWildcardFieldsAfterLookupJoins() {
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
assertFieldNames("""
FROM sample_data
| EVAL client_ip = client_ip::keyword
| LOOKUP JOIN clientips_lookup ON client_ip
| LOOKUP JOIN message_types_lookup ON message
| SORT @timestamp
| DROP *e""", Set.of("*"), Set.of());
}

public void testDropWildcardFieldsAfterLookupJoins2() {
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
assertFieldNames("""
FROM sample_data
| EVAL client_ip = client_ip::keyword
| LOOKUP JOIN clientips_lookup ON client_ip
| DROP *e, client_ip
| LOOKUP JOIN message_types_lookup ON message
| SORT @timestamp
| DROP *e""", Set.of("*"), Set.of());
}

public void testDropWildcardFieldsAfterLookupJoinsAndKeep() {
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
assertFieldNames(
"""
Expand All @@ -1998,6 +2021,55 @@ public void testDropWildcardFields_WithLookupJoin() {
);
}

public void testDropWildcardFieldsAfterLookupJoinKeepLookupJoin() {
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
assertFieldNames(
"""
FROM sample_data
| EVAL client_ip = client_ip::keyword
| LOOKUP JOIN clientips_lookup ON client_ip
| KEEP @timestamp, *e*, client_ip
| LOOKUP JOIN message_types_lookup ON message
| SORT @timestamp
| DROP *e""",
Set.of("client_ip", "client_ip.*", "message", "message.*", "@timestamp", "@timestamp.*", "*e*", "*e", "*e.*"),
Set.of("message_types_lookup")
);
}

public void testDropWildcardFieldsAfterKeepAndLookupJoins() {
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
assertFieldNames(
"""
FROM sample_data
| EVAL client_ip = client_ip::keyword
| KEEP @timestamp, *e*, client_ip
| LOOKUP JOIN clientips_lookup ON client_ip
| LOOKUP JOIN message_types_lookup ON message
| SORT @timestamp
| DROP *e""",
Set.of("client_ip", "client_ip.*", "message", "message.*", "@timestamp", "@timestamp.*", "*e*", "*e", "*e.*"),
Set.of("clientips_lookup", "message_types_lookup")
);
}

public void testDropWildcardFieldsAfterKeepAndLookupJoins2() {
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
assertFieldNames(
"""
FROM sample_data
| EVAL client_ip = client_ip::keyword
| KEEP @timestamp, *e*, client_ip
| LOOKUP JOIN clientips_lookup ON client_ip
| DROP *e
| LOOKUP JOIN message_types_lookup ON message
| SORT @timestamp
| DROP *e, client_ip""",
Set.of("client_ip", "client_ip.*", "message", "message.*", "@timestamp", "@timestamp.*", "*e*", "*e", "*e.*"),
Set.of("clientips_lookup", "message_types_lookup")
);
}

private Set<String> fieldNames(String query, Set<String> enrichPolicyMatchFields) {
var preAnalysisResult = new EsqlSession.PreAnalysisResult(null);
return EsqlSession.fieldNames(parser.createStatement(query), enrichPolicyMatchFields, preAnalysisResult).fieldNames();
Expand Down