From f64d249d373ab10b46c0d3e91e7ffa3ed24796d2 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Wed, 2 Jul 2025 12:58:57 +0200 Subject: [PATCH 1/5] Fix wildcard drop after lookup join In EsqlSession#fieldNames, keep track of wildcard patterns in DROP separately from the references and wildcard patterns in KEEP commands. If the latter are present after a LOOKUP JOIN, we'll ask fieldcaps for specific fields of the lookup index, whereas `DROP somefield*` should not trigger asking for specific fields by itself - if there's no downstream KEEP, we still need to ask for `*`. --- .../xpack/esql/session/EsqlSession.java | 40 ++++++++++++------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java index 4ff65f59bbd72..b6dd0c40f3481 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java @@ -627,16 +627,20 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy } var referencesBuilder = AttributeSet.builder(); - // "keep" and "drop" attributes are special whenever a wildcard is used in their name, as the wildcard can shadow some - // attributes ("lookup join" generated columns among others) and steps like removal of Aliases should ignore the fields - // to remove if their name matches one of these wildcards. + // "keep" and "drop" attributes are special whenever a wildcard is used in their name, as the wildcard can cover some + // attributes ("lookup join" generated columns among others); steps like removal of Aliases should ignore fields matching the + // wildcards. // - // ie "from test | eval lang = languages + 1 | keep *l" should consider both "languages" and "*l" as valid fields to ask for - // "from test | eval first_name = 1 | drop first_name | drop *name should also consider "*name" as valid field to ask for + // E.g. "from test | eval lang = languages + 1 | keep *l" should consider both "languages" and "*l" as valid fields to ask for + // "from test | eval first_name = 1 | drop first_name | drop *name" should also consider "*name" as valid field to ask for // // NOTE: the grammar allows wildcards to be used in other commands as well, but these are forbidden in the LogicalPlanBuilder - var shadowingRefsBuilder = AttributeSet.builder(); - var keepJoinRefsBuilder = AttributeSet.builder(); + // Except in KEEP and DROP. + var keepRefs = AttributeSet.builder(); + var dropWildcardRefs = AttributeSet.builder(); + // fields required to request for lookup joins to work + var joinRefs = AttributeSet.builder(); + // lookup indices where we request "*" because we may require all their fields Set wildcardJoinIndices = new java.util.HashSet<>(); boolean[] canRemoveAliases = new boolean[] { true }; @@ -654,14 +658,14 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy referencesBuilder.addAll(enrichRefs); } else if (p instanceof LookupJoin join) { if (join.config().type() instanceof JoinTypes.UsingJoinType usingJoinType) { - keepJoinRefsBuilder.addAll(usingJoinType.columns()); + joinRefs.addAll(usingJoinType.columns()); } - if (shadowingRefsBuilder.isEmpty()) { + if (keepRefs.isEmpty()) { // No KEEP commands after the JOIN, so we need to mark this index for "*" field resolution wildcardJoinIndices.add(((UnresolvedRelation) join.right()).indexPattern().indexPattern()); } else { // Keep commands can reference the join columns with names that shadow aliases, so we block their removal - keepJoinRefsBuilder.addAll(shadowingRefsBuilder); + joinRefs.addAll(keepRefs); } } else { referencesBuilder.addAll(p.references()); @@ -673,10 +677,16 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy p.forEachExpression(UnresolvedNamePattern.class, up -> { var ua = new UnresolvedAttribute(up.source(), up.name()); referencesBuilder.add(ua); - shadowingRefsBuilder.add(ua); + if (p instanceof Keep) { + keepRefs.add(ua); + } else if (p instanceof Drop) { + dropWildcardRefs.add(ua); + } else { + throw new IllegalStateException("Only KEEP and DROP should allow wildcards"); + } }); if (p instanceof Keep) { - shadowingRefsBuilder.addAll(p.references()); + keepRefs.addAll(p.references()); } } @@ -710,13 +720,15 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy if (fieldNames.contains(ne.name())) { return; } - referencesBuilder.removeIf(attr -> matchByName(attr, ne.name(), shadowingRefsBuilder.contains(attr))); + referencesBuilder.removeIf( + attr -> matchByName(attr, ne.name(), keepRefs.contains(attr) || dropWildcardRefs.contains(attr)) + ); }); } }); // Add JOIN ON column references afterward to avoid Alias removal - referencesBuilder.addAll(keepJoinRefsBuilder); + referencesBuilder.addAll(joinRefs); // If any JOIN commands need wildcard field-caps calls, persist the index names if (wildcardJoinIndices.isEmpty() == false) { result = result.withWildcardJoinIndices(wildcardJoinIndices); From 0a48e02787a0e0e8668981c5175f776a5498fde4 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Wed, 2 Jul 2025 13:12:18 +0200 Subject: [PATCH 2/5] Update docs/changelog/130448.yaml --- docs/changelog/130448.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 docs/changelog/130448.yaml diff --git a/docs/changelog/130448.yaml b/docs/changelog/130448.yaml new file mode 100644 index 0000000000000..d99ef37b0e071 --- /dev/null +++ b/docs/changelog/130448.yaml @@ -0,0 +1,6 @@ +pr: 130448 +summary: Fix wildcard drop after lookup join +area: ES|QL +type: bug +issues: + - 129561 From 547491f878b53a675a27cf31edbd75e7ba77cb18 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Wed, 2 Jul 2025 13:33:31 +0200 Subject: [PATCH 3/5] Add reproducer as csv test --- .../src/main/resources/lookup-join.csv-spec | 15 +++++++++++++++ .../xpack/esql/action/EsqlCapabilities.java | 7 +++++++ 2 files changed, 22 insertions(+) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec index 6254b42e176fa..9dcebf3c4aa3c 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec @@ -1841,6 +1841,21 @@ max:long 8268153 ; +wildcardDropAfterLookupJoin +required_capability: join_lookup_v12 +required_capability: drop_with_wildcard_after_lookup_join + +ROW somefield = 0, type = "Production" +| KEEP somefield, type +| LOOKUP JOIN message_types_lookup ON type +| DROP *field +; + +type:keyword | message:keyword +Production | Production environment +; + + ############################################### # LOOKUP JOIN on mixed numerical fields ############################################### diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 9a8b71e8e5eea..8a2b363b7cdf9 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -1069,6 +1069,13 @@ public enum Cap { */ DROP_AGAIN_WITH_WILDCARD_AFTER_EVAL, + /** + * Correctly ask for all fields from lookup indices even when there is e.g. a {@code DROP *field} after. + * See + * ES|QL: missing columns for wildcard drop after lookup join #129561 + */ + DROP_WITH_WILDCARD_AFTER_LOOKUP_JOIN, + /** * Support last_over_time aggregation that gets evaluated per time-series */ From ff937880f7c7f4329400a48c8b3e160f95d8cecd Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Thu, 3 Jul 2025 13:36:34 +0200 Subject: [PATCH 4/5] Add unit tests --- .../session/IndexResolverFieldNamesTests.java | 76 ++++++++++++++++++- 1 file changed, 74 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java index 4264fdacc5d7b..ae66488f419e1 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java @@ -1969,7 +1969,7 @@ public void testDropAgainWithWildcardAfterEval() { """, Set.of("emp_no", "emp_no.*", "*name", "*name.*")); } - public void testDropWildcardedFields_AfterRename() { + public void testDropWildcardFieldsAfterRename() { assertFieldNames( """ from employees @@ -1982,7 +1982,30 @@ public void testDropWildcardedFields_AfterRename() { ); } - public void testDropWildcardFields_WithLookupJoin() { + public void testDropWildcardFieldsAfterLookupJoins() { + assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled()); + assertFieldNames(""" + FROM sample_data + | EVAL client_ip = client_ip::keyword + | LOOKUP JOIN clientips_lookup ON client_ip + | LOOKUP JOIN message_types_lookup ON message + | SORT @timestamp + | DROP *e""", Set.of("*"), Set.of()); + } + + public void testDropWildcardFieldsAfterLookupJoins2() { + assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled()); + assertFieldNames(""" + FROM sample_data + | EVAL client_ip = client_ip::keyword + | LOOKUP JOIN clientips_lookup ON client_ip + | DROP *e, client_ip + | LOOKUP JOIN message_types_lookup ON message + | SORT @timestamp + | DROP *e""", Set.of("*"), Set.of()); + } + + public void testDropWildcardFieldsAfterLookupJoinsAndKeep() { assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled()); assertFieldNames( """ @@ -1998,6 +2021,55 @@ public void testDropWildcardFields_WithLookupJoin() { ); } + public void testDropWildcardFieldsAfterLookupJoinKeepLookupJoin() { + assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled()); + assertFieldNames( + """ + FROM sample_data + | EVAL client_ip = client_ip::keyword + | LOOKUP JOIN clientips_lookup ON client_ip + | KEEP @timestamp, *e*, client_ip + | LOOKUP JOIN message_types_lookup ON message + | SORT @timestamp + | DROP *e""", + Set.of("client_ip", "client_ip.*", "message", "message.*", "@timestamp", "@timestamp.*", "*e*", "*e", "*e.*"), + Set.of("message_types_lookup") + ); + } + + public void testDropWildcardFieldsAfterKeepAndLookupJoins() { + assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled()); + assertFieldNames( + """ + FROM sample_data + | EVAL client_ip = client_ip::keyword + | KEEP @timestamp, *e*, client_ip + | LOOKUP JOIN clientips_lookup ON client_ip + | LOOKUP JOIN message_types_lookup ON message + | SORT @timestamp + | DROP *e""", + Set.of("client_ip", "client_ip.*", "message", "message.*", "@timestamp", "@timestamp.*", "*e*", "*e", "*e.*"), + Set.of("clientips_lookup", "message_types_lookup") + ); + } + + public void testDropWildcardFieldsAfterKeepAndLookupJoins2() { + assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled()); + assertFieldNames( + """ + FROM sample_data + | EVAL client_ip = client_ip::keyword + | KEEP @timestamp, *e*, client_ip + | LOOKUP JOIN clientips_lookup ON client_ip + | DROP *e + | LOOKUP JOIN message_types_lookup ON message + | SORT @timestamp + | DROP *e, client_ip""", + Set.of("client_ip", "client_ip.*", "message", "message.*", "@timestamp", "@timestamp.*", "*e*", "*e", "*e.*"), + Set.of("clientips_lookup", "message_types_lookup") + ); + } + public void testForkFieldsWithKeepAfterFork() { assertFieldNames(""" FROM test From da4db6e3151ad464e01fc189493bccac3ead75fd Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Fri, 4 Jul 2025 18:13:15 +0200 Subject: [PATCH 5/5] Add one more test --- .../src/main/resources/lookup-join.csv-spec | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec index 9dcebf3c4aa3c..e82c7be4cede2 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec @@ -1856,6 +1856,26 @@ Production | Production environment ; +wildcardDropAfterLookupJoinTwice +required_capability: join_lookup_v12 +required_capability: drop_with_wildcard_after_lookup_join + +ROW somefield = 0, type = "Production" +| KEEP somefield, type +| EVAL otherfield = 123, language_code = 3 +| LOOKUP JOIN message_types_lookup ON type +| DROP *field +| EVAL foofield = 123 +| KEEP * +| LOOKUP JOIN languages_lookup ON language_code +| DROP *ge, *field +; + +type:keyword | language_code:integer | language_name:keyword +Production | 3 | Spanish +; + + ############################################### # LOOKUP JOIN on mixed numerical fields ###############################################