diff --git a/docs/changelog/130448.yaml b/docs/changelog/130448.yaml new file mode 100644 index 0000000000000..d99ef37b0e071 --- /dev/null +++ b/docs/changelog/130448.yaml @@ -0,0 +1,6 @@ +pr: 130448 +summary: Fix wildcard drop after lookup join +area: ES|QL +type: bug +issues: + - 129561 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec index 6254b42e176fa..e82c7be4cede2 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec @@ -1841,6 +1841,41 @@ max:long 8268153 ; +wildcardDropAfterLookupJoin +required_capability: join_lookup_v12 +required_capability: drop_with_wildcard_after_lookup_join + +ROW somefield = 0, type = "Production" +| KEEP somefield, type +| LOOKUP JOIN message_types_lookup ON type +| DROP *field +; + +type:keyword | message:keyword +Production | Production environment +; + + +wildcardDropAfterLookupJoinTwice +required_capability: join_lookup_v12 +required_capability: drop_with_wildcard_after_lookup_join + +ROW somefield = 0, type = "Production" +| KEEP somefield, type +| EVAL otherfield = 123, language_code = 3 +| LOOKUP JOIN message_types_lookup ON type +| DROP *field +| EVAL foofield = 123 +| KEEP * +| LOOKUP JOIN languages_lookup ON language_code +| DROP *ge, *field +; + +type:keyword | language_code:integer | language_name:keyword +Production | 3 | Spanish +; + + ############################################### # LOOKUP JOIN on mixed numerical fields ############################################### diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 6ea22bff910ee..66b384af91c4a 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -1069,6 +1069,13 @@ public enum Cap { */ DROP_AGAIN_WITH_WILDCARD_AFTER_EVAL, + /** + * Correctly ask for all fields from lookup indices even when there is e.g. a {@code DROP *field} after. + * See + * ES|QL: missing columns for wildcard drop after lookup join #129561 + */ + DROP_WITH_WILDCARD_AFTER_LOOKUP_JOIN, + /** * Support last_over_time aggregation that gets evaluated per time-series */ diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java index 4ff65f59bbd72..b6dd0c40f3481 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java @@ -627,16 +627,20 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy } var referencesBuilder = AttributeSet.builder(); - // "keep" and "drop" attributes are special whenever a wildcard is used in their name, as the wildcard can shadow some - // attributes ("lookup join" generated columns among others) and steps like removal of Aliases should ignore the fields - // to remove if their name matches one of these wildcards. + // "keep" and "drop" attributes are special whenever a wildcard is used in their name, as the wildcard can cover some + // attributes ("lookup join" generated columns among others); steps like removal of Aliases should ignore fields matching the + // wildcards. // - // ie "from test | eval lang = languages + 1 | keep *l" should consider both "languages" and "*l" as valid fields to ask for - // "from test | eval first_name = 1 | drop first_name | drop *name should also consider "*name" as valid field to ask for + // E.g. "from test | eval lang = languages + 1 | keep *l" should consider both "languages" and "*l" as valid fields to ask for + // "from test | eval first_name = 1 | drop first_name | drop *name" should also consider "*name" as valid field to ask for // // NOTE: the grammar allows wildcards to be used in other commands as well, but these are forbidden in the LogicalPlanBuilder - var shadowingRefsBuilder = AttributeSet.builder(); - var keepJoinRefsBuilder = AttributeSet.builder(); + // Except in KEEP and DROP. + var keepRefs = AttributeSet.builder(); + var dropWildcardRefs = AttributeSet.builder(); + // fields required to request for lookup joins to work + var joinRefs = AttributeSet.builder(); + // lookup indices where we request "*" because we may require all their fields Set wildcardJoinIndices = new java.util.HashSet<>(); boolean[] canRemoveAliases = new boolean[] { true }; @@ -654,14 +658,14 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy referencesBuilder.addAll(enrichRefs); } else if (p instanceof LookupJoin join) { if (join.config().type() instanceof JoinTypes.UsingJoinType usingJoinType) { - keepJoinRefsBuilder.addAll(usingJoinType.columns()); + joinRefs.addAll(usingJoinType.columns()); } - if (shadowingRefsBuilder.isEmpty()) { + if (keepRefs.isEmpty()) { // No KEEP commands after the JOIN, so we need to mark this index for "*" field resolution wildcardJoinIndices.add(((UnresolvedRelation) join.right()).indexPattern().indexPattern()); } else { // Keep commands can reference the join columns with names that shadow aliases, so we block their removal - keepJoinRefsBuilder.addAll(shadowingRefsBuilder); + joinRefs.addAll(keepRefs); } } else { referencesBuilder.addAll(p.references()); @@ -673,10 +677,16 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy p.forEachExpression(UnresolvedNamePattern.class, up -> { var ua = new UnresolvedAttribute(up.source(), up.name()); referencesBuilder.add(ua); - shadowingRefsBuilder.add(ua); + if (p instanceof Keep) { + keepRefs.add(ua); + } else if (p instanceof Drop) { + dropWildcardRefs.add(ua); + } else { + throw new IllegalStateException("Only KEEP and DROP should allow wildcards"); + } }); if (p instanceof Keep) { - shadowingRefsBuilder.addAll(p.references()); + keepRefs.addAll(p.references()); } } @@ -710,13 +720,15 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy if (fieldNames.contains(ne.name())) { return; } - referencesBuilder.removeIf(attr -> matchByName(attr, ne.name(), shadowingRefsBuilder.contains(attr))); + referencesBuilder.removeIf( + attr -> matchByName(attr, ne.name(), keepRefs.contains(attr) || dropWildcardRefs.contains(attr)) + ); }); } }); // Add JOIN ON column references afterward to avoid Alias removal - referencesBuilder.addAll(keepJoinRefsBuilder); + referencesBuilder.addAll(joinRefs); // If any JOIN commands need wildcard field-caps calls, persist the index names if (wildcardJoinIndices.isEmpty() == false) { result = result.withWildcardJoinIndices(wildcardJoinIndices); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java index 4264fdacc5d7b..ae66488f419e1 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java @@ -1969,7 +1969,7 @@ public void testDropAgainWithWildcardAfterEval() { """, Set.of("emp_no", "emp_no.*", "*name", "*name.*")); } - public void testDropWildcardedFields_AfterRename() { + public void testDropWildcardFieldsAfterRename() { assertFieldNames( """ from employees @@ -1982,7 +1982,30 @@ public void testDropWildcardedFields_AfterRename() { ); } - public void testDropWildcardFields_WithLookupJoin() { + public void testDropWildcardFieldsAfterLookupJoins() { + assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled()); + assertFieldNames(""" + FROM sample_data + | EVAL client_ip = client_ip::keyword + | LOOKUP JOIN clientips_lookup ON client_ip + | LOOKUP JOIN message_types_lookup ON message + | SORT @timestamp + | DROP *e""", Set.of("*"), Set.of()); + } + + public void testDropWildcardFieldsAfterLookupJoins2() { + assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled()); + assertFieldNames(""" + FROM sample_data + | EVAL client_ip = client_ip::keyword + | LOOKUP JOIN clientips_lookup ON client_ip + | DROP *e, client_ip + | LOOKUP JOIN message_types_lookup ON message + | SORT @timestamp + | DROP *e""", Set.of("*"), Set.of()); + } + + public void testDropWildcardFieldsAfterLookupJoinsAndKeep() { assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled()); assertFieldNames( """ @@ -1998,6 +2021,55 @@ public void testDropWildcardFields_WithLookupJoin() { ); } + public void testDropWildcardFieldsAfterLookupJoinKeepLookupJoin() { + assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled()); + assertFieldNames( + """ + FROM sample_data + | EVAL client_ip = client_ip::keyword + | LOOKUP JOIN clientips_lookup ON client_ip + | KEEP @timestamp, *e*, client_ip + | LOOKUP JOIN message_types_lookup ON message + | SORT @timestamp + | DROP *e""", + Set.of("client_ip", "client_ip.*", "message", "message.*", "@timestamp", "@timestamp.*", "*e*", "*e", "*e.*"), + Set.of("message_types_lookup") + ); + } + + public void testDropWildcardFieldsAfterKeepAndLookupJoins() { + assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled()); + assertFieldNames( + """ + FROM sample_data + | EVAL client_ip = client_ip::keyword + | KEEP @timestamp, *e*, client_ip + | LOOKUP JOIN clientips_lookup ON client_ip + | LOOKUP JOIN message_types_lookup ON message + | SORT @timestamp + | DROP *e""", + Set.of("client_ip", "client_ip.*", "message", "message.*", "@timestamp", "@timestamp.*", "*e*", "*e", "*e.*"), + Set.of("clientips_lookup", "message_types_lookup") + ); + } + + public void testDropWildcardFieldsAfterKeepAndLookupJoins2() { + assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled()); + assertFieldNames( + """ + FROM sample_data + | EVAL client_ip = client_ip::keyword + | KEEP @timestamp, *e*, client_ip + | LOOKUP JOIN clientips_lookup ON client_ip + | DROP *e + | LOOKUP JOIN message_types_lookup ON message + | SORT @timestamp + | DROP *e, client_ip""", + Set.of("client_ip", "client_ip.*", "message", "message.*", "@timestamp", "@timestamp.*", "*e*", "*e", "*e.*"), + Set.of("clientips_lookup", "message_types_lookup") + ); + } + public void testForkFieldsWithKeepAfterFork() { assertFieldNames(""" FROM test