diff --git a/docs/changelog/130448.yaml b/docs/changelog/130448.yaml new file mode 100644 index 0000000000000..d99ef37b0e071 --- /dev/null +++ b/docs/changelog/130448.yaml @@ -0,0 +1,6 @@ +pr: 130448 +summary: Fix wildcard drop after lookup join +area: ES|QL +type: bug +issues: + - 129561 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec index dba2f653b1d01..cccd7187a15eb 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec @@ -1682,3 +1682,37 @@ max:long 3450233 8268153 ; + +wildcardDropAfterLookupJoin +required_capability: join_lookup_v12 +required_capability: drop_with_wildcard_after_lookup_join + +ROW somefield = 0, type = "Production" +| KEEP somefield, type +| LOOKUP JOIN message_types_lookup ON type +| DROP *field +; + +type:keyword | message:keyword +Production | Production environment +; + + +wildcardDropAfterLookupJoinTwice +required_capability: join_lookup_v12 +required_capability: drop_with_wildcard_after_lookup_join + +ROW somefield = 0, type = "Production" +| KEEP somefield, type +| EVAL otherfield = 123, language_code = 3 +| LOOKUP JOIN message_types_lookup ON type +| DROP *field +| EVAL foofield = 123 +| KEEP * +| LOOKUP JOIN languages_lookup ON language_code +| DROP *ge, *field +; + +type:keyword | language_code:integer | language_name:keyword +Production | 3 | Spanish +; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index bfbea6fddf0da..b9503e2bbb2d3 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -742,6 +742,13 @@ public enum Cap { */ DROP_AGAIN_WITH_WILDCARD_AFTER_EVAL, + /** + * Correctly ask for all fields from lookup indices even when there is e.g. a {@code DROP *field} after. + * See + * ES|QL: missing columns for wildcard drop after lookup join #129561 + */ + DROP_WITH_WILDCARD_AFTER_LOOKUP_JOIN, + /** * During resolution (pre-analysis) we have to consider that joins can override regex extracted values * see ES|QL: pruning of JOINs leads to missing fields #127467 diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java index 0dffdbc14b838..cd4e6599b5d78 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java @@ -578,16 +578,20 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy } AttributeSet references = new AttributeSet(); - // "keep" and "drop" attributes are special whenever a wildcard is used in their name, as the wildcard can shadow some - // attributes ("lookup join" generated columns among others) and steps like removal of Aliases should ignore the fields - // to remove if their name matches one of these wildcards. + // "keep" and "drop" attributes are special whenever a wildcard is used in their name, as the wildcard can cover some + // attributes ("lookup join" generated columns among others); steps like removal of Aliases should ignore fields matching the + // wildcards. // - // ie "from test | eval lang = languages + 1 | keep *l" should consider both "languages" and "*l" as valid fields to ask for - // "from test | eval first_name = 1 | drop first_name | drop *name should also consider "*name" as valid field to ask for + // E.g. "from test | eval lang = languages + 1 | keep *l" should consider both "languages" and "*l" as valid fields to ask for + // "from test | eval first_name = 1 | drop first_name | drop *name" should also consider "*name" as valid field to ask for // // NOTE: the grammar allows wildcards to be used in other commands as well, but these are forbidden in the LogicalPlanBuilder - var shadowingRefs = new AttributeSet(); - var keepJoinRefs = new AttributeSet(); + // Except in KEEP and DROP. + var keepRefs = new AttributeSet(); + var dropWildcardRefs = new AttributeSet(); + // fields required to request for lookup joins to work + var joinRefs = new AttributeSet(); + // lookup indices where we request "*" because we may require all their fields Set wildcardJoinIndices = new java.util.HashSet<>(); boolean[] canRemoveAliases = new boolean[] { true }; @@ -605,14 +609,14 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy references.addAll(enrichRefs); } else if (p instanceof LookupJoin join) { if (join.config().type() instanceof JoinTypes.UsingJoinType usingJoinType) { - keepJoinRefs.addAll(usingJoinType.columns()); + joinRefs.addAll(usingJoinType.columns()); } - if (shadowingRefs.isEmpty()) { + if (keepRefs.isEmpty()) { // No KEEP commands after the JOIN, so we need to mark this index for "*" field resolution wildcardJoinIndices.add(((UnresolvedRelation) join.right()).indexPattern().indexPattern()); } else { // Keep commands can reference the join columns with names that shadow aliases, so we block their removal - keepJoinRefs.addAll(shadowingRefs); + joinRefs.addAll(keepRefs); } } else { references.addAll(p.references()); @@ -624,10 +628,16 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy p.forEachExpression(UnresolvedNamePattern.class, up -> { var ua = new UnresolvedAttribute(up.source(), up.name()); references.add(ua); - shadowingRefs.add(ua); + if (p instanceof Keep) { + keepRefs.add(ua); + } else if (p instanceof Drop) { + dropWildcardRefs.add(ua); + } else { + throw new IllegalStateException("Only KEEP and DROP should allow wildcards"); + } }); if (p instanceof Keep) { - shadowingRefs.addAll(p.references()); + keepRefs.addAll(p.references()); } } @@ -661,13 +671,13 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy if (fieldNames.contains(ne.name())) { return; } - references.removeIf(attr -> matchByName(attr, ne.name(), shadowingRefs.contains(attr))); + references.removeIf(attr -> matchByName(attr, ne.name(), keepRefs.contains(attr) || dropWildcardRefs.contains(attr))); }); } }); // Add JOIN ON column references afterward to avoid Alias removal - references.addAll(keepJoinRefs); + references.addAll(joinRefs); // If any JOIN commands need wildcard field-caps calls, persist the index names if (wildcardJoinIndices.isEmpty() == false) { result = result.withWildcardJoinIndices(wildcardJoinIndices); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java index 17f9c48bebb8b..a10bc7bd73116 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java @@ -1758,7 +1758,7 @@ public void testDropAgainWithWildcardAfterEval() { """, Set.of("emp_no", "emp_no.*", "*name", "*name.*")); } - public void testDropWildcardedFields_AfterRename() { + public void testDropWildcardFieldsAfterRename() { assertFieldNames( """ from employees @@ -1771,7 +1771,30 @@ public void testDropWildcardedFields_AfterRename() { ); } - public void testDropWildcardFields_WithLookupJoin() { + public void testDropWildcardFieldsAfterLookupJoins() { + assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled()); + assertFieldNames(""" + FROM sample_data + | EVAL client_ip = client_ip::keyword + | LOOKUP JOIN clientips_lookup ON client_ip + | LOOKUP JOIN message_types_lookup ON message + | SORT @timestamp + | DROP *e""", Set.of("*"), Set.of()); + } + + public void testDropWildcardFieldsAfterLookupJoins2() { + assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled()); + assertFieldNames(""" + FROM sample_data + | EVAL client_ip = client_ip::keyword + | LOOKUP JOIN clientips_lookup ON client_ip + | DROP *e, client_ip + | LOOKUP JOIN message_types_lookup ON message + | SORT @timestamp + | DROP *e""", Set.of("*"), Set.of()); + } + + public void testDropWildcardFieldsAfterLookupJoinsAndKeep() { assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled()); assertFieldNames( """ @@ -1787,6 +1810,55 @@ public void testDropWildcardFields_WithLookupJoin() { ); } + public void testDropWildcardFieldsAfterLookupJoinKeepLookupJoin() { + assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled()); + assertFieldNames( + """ + FROM sample_data + | EVAL client_ip = client_ip::keyword + | LOOKUP JOIN clientips_lookup ON client_ip + | KEEP @timestamp, *e*, client_ip + | LOOKUP JOIN message_types_lookup ON message + | SORT @timestamp + | DROP *e""", + Set.of("client_ip", "client_ip.*", "message", "message.*", "@timestamp", "@timestamp.*", "*e*", "*e", "*e.*"), + Set.of("message_types_lookup") + ); + } + + public void testDropWildcardFieldsAfterKeepAndLookupJoins() { + assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled()); + assertFieldNames( + """ + FROM sample_data + | EVAL client_ip = client_ip::keyword + | KEEP @timestamp, *e*, client_ip + | LOOKUP JOIN clientips_lookup ON client_ip + | LOOKUP JOIN message_types_lookup ON message + | SORT @timestamp + | DROP *e""", + Set.of("client_ip", "client_ip.*", "message", "message.*", "@timestamp", "@timestamp.*", "*e*", "*e", "*e.*"), + Set.of("clientips_lookup", "message_types_lookup") + ); + } + + public void testDropWildcardFieldsAfterKeepAndLookupJoins2() { + assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled()); + assertFieldNames( + """ + FROM sample_data + | EVAL client_ip = client_ip::keyword + | KEEP @timestamp, *e*, client_ip + | LOOKUP JOIN clientips_lookup ON client_ip + | DROP *e + | LOOKUP JOIN message_types_lookup ON message + | SORT @timestamp + | DROP *e, client_ip""", + Set.of("client_ip", "client_ip.*", "message", "message.*", "@timestamp", "@timestamp.*", "*e*", "*e", "*e.*"), + Set.of("clientips_lookup", "message_types_lookup") + ); + } + private Set fieldNames(String query, Set enrichPolicyMatchFields) { var preAnalysisResult = new EsqlSession.PreAnalysisResult(null); return EsqlSession.fieldNames(parser.createStatement(query), enrichPolicyMatchFields, preAnalysisResult).fieldNames();