From a0539f61d3e0b23d7660cf5b5f8273d5163c7582 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Mon, 7 Jul 2025 09:11:56 +0200 Subject: [PATCH] ESQL: Fix wildcard DROP after LOOKUP JOIN (#130448) In EsqlSession#fieldNames, keep track of wildcard patterns in DROP separately from the references and wildcard patterns in KEEP commands. Only the latter are relevant to determine if we need to ask for all fields for the lookup index in the field caps request. --- docs/changelog/130448.yaml | 6 ++ .../src/main/resources/lookup-join.csv-spec | 35 +++++++++ .../xpack/esql/action/EsqlCapabilities.java | 7 ++ .../xpack/esql/session/EsqlSession.java | 40 ++++++---- .../session/IndexResolverFieldNamesTests.java | 76 ++++++++++++++++++- 5 files changed, 148 insertions(+), 16 deletions(-) create mode 100644 docs/changelog/130448.yaml diff --git a/docs/changelog/130448.yaml b/docs/changelog/130448.yaml new file mode 100644 index 0000000000000..d99ef37b0e071 --- /dev/null +++ b/docs/changelog/130448.yaml @@ -0,0 +1,6 @@ +pr: 130448 +summary: Fix wildcard drop after lookup join +area: ES|QL +type: bug +issues: + - 129561 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec index 6254b42e176fa..e82c7be4cede2 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec @@ -1841,6 +1841,41 @@ max:long 8268153 ; +wildcardDropAfterLookupJoin +required_capability: join_lookup_v12 +required_capability: drop_with_wildcard_after_lookup_join + +ROW somefield = 0, type = "Production" +| KEEP somefield, type +| LOOKUP JOIN message_types_lookup ON type +| DROP *field +; + +type:keyword | message:keyword +Production | Production environment +; + + +wildcardDropAfterLookupJoinTwice +required_capability: join_lookup_v12 +required_capability: drop_with_wildcard_after_lookup_join + +ROW somefield = 0, type = "Production" +| KEEP somefield, type +| EVAL otherfield = 123, language_code = 3 +| LOOKUP JOIN message_types_lookup ON type +| DROP *field +| EVAL foofield = 123 +| KEEP * +| LOOKUP JOIN languages_lookup ON language_code +| DROP *ge, *field +; + +type:keyword | language_code:integer | language_name:keyword +Production | 3 | Spanish +; + + ############################################### # LOOKUP JOIN on mixed numerical fields ############################################### diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index f80df51ba21d1..8b1320a9a5b21 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -1069,6 +1069,13 @@ public enum Cap { */ DROP_AGAIN_WITH_WILDCARD_AFTER_EVAL, + /** + * Correctly ask for all fields from lookup indices even when there is e.g. a {@code DROP *field} after. + * See + * ES|QL: missing columns for wildcard drop after lookup join #129561 + */ + DROP_WITH_WILDCARD_AFTER_LOOKUP_JOIN, + /** * Support last_over_time aggregation that gets evaluated per time-series */ diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java index 4ff65f59bbd72..b6dd0c40f3481 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java @@ -627,16 +627,20 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy } var referencesBuilder = AttributeSet.builder(); - // "keep" and "drop" attributes are special whenever a wildcard is used in their name, as the wildcard can shadow some - // attributes ("lookup join" generated columns among others) and steps like removal of Aliases should ignore the fields - // to remove if their name matches one of these wildcards. + // "keep" and "drop" attributes are special whenever a wildcard is used in their name, as the wildcard can cover some + // attributes ("lookup join" generated columns among others); steps like removal of Aliases should ignore fields matching the + // wildcards. // - // ie "from test | eval lang = languages + 1 | keep *l" should consider both "languages" and "*l" as valid fields to ask for - // "from test | eval first_name = 1 | drop first_name | drop *name should also consider "*name" as valid field to ask for + // E.g. "from test | eval lang = languages + 1 | keep *l" should consider both "languages" and "*l" as valid fields to ask for + // "from test | eval first_name = 1 | drop first_name | drop *name" should also consider "*name" as valid field to ask for // // NOTE: the grammar allows wildcards to be used in other commands as well, but these are forbidden in the LogicalPlanBuilder - var shadowingRefsBuilder = AttributeSet.builder(); - var keepJoinRefsBuilder = AttributeSet.builder(); + // Except in KEEP and DROP. + var keepRefs = AttributeSet.builder(); + var dropWildcardRefs = AttributeSet.builder(); + // fields required to request for lookup joins to work + var joinRefs = AttributeSet.builder(); + // lookup indices where we request "*" because we may require all their fields Set wildcardJoinIndices = new java.util.HashSet<>(); boolean[] canRemoveAliases = new boolean[] { true }; @@ -654,14 +658,14 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy referencesBuilder.addAll(enrichRefs); } else if (p instanceof LookupJoin join) { if (join.config().type() instanceof JoinTypes.UsingJoinType usingJoinType) { - keepJoinRefsBuilder.addAll(usingJoinType.columns()); + joinRefs.addAll(usingJoinType.columns()); } - if (shadowingRefsBuilder.isEmpty()) { + if (keepRefs.isEmpty()) { // No KEEP commands after the JOIN, so we need to mark this index for "*" field resolution wildcardJoinIndices.add(((UnresolvedRelation) join.right()).indexPattern().indexPattern()); } else { // Keep commands can reference the join columns with names that shadow aliases, so we block their removal - keepJoinRefsBuilder.addAll(shadowingRefsBuilder); + joinRefs.addAll(keepRefs); } } else { referencesBuilder.addAll(p.references()); @@ -673,10 +677,16 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy p.forEachExpression(UnresolvedNamePattern.class, up -> { var ua = new UnresolvedAttribute(up.source(), up.name()); referencesBuilder.add(ua); - shadowingRefsBuilder.add(ua); + if (p instanceof Keep) { + keepRefs.add(ua); + } else if (p instanceof Drop) { + dropWildcardRefs.add(ua); + } else { + throw new IllegalStateException("Only KEEP and DROP should allow wildcards"); + } }); if (p instanceof Keep) { - shadowingRefsBuilder.addAll(p.references()); + keepRefs.addAll(p.references()); } } @@ -710,13 +720,15 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy if (fieldNames.contains(ne.name())) { return; } - referencesBuilder.removeIf(attr -> matchByName(attr, ne.name(), shadowingRefsBuilder.contains(attr))); + referencesBuilder.removeIf( + attr -> matchByName(attr, ne.name(), keepRefs.contains(attr) || dropWildcardRefs.contains(attr)) + ); }); } }); // Add JOIN ON column references afterward to avoid Alias removal - referencesBuilder.addAll(keepJoinRefsBuilder); + referencesBuilder.addAll(joinRefs); // If any JOIN commands need wildcard field-caps calls, persist the index names if (wildcardJoinIndices.isEmpty() == false) { result = result.withWildcardJoinIndices(wildcardJoinIndices); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java index 4264fdacc5d7b..ae66488f419e1 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java @@ -1969,7 +1969,7 @@ public void testDropAgainWithWildcardAfterEval() { """, Set.of("emp_no", "emp_no.*", "*name", "*name.*")); } - public void testDropWildcardedFields_AfterRename() { + public void testDropWildcardFieldsAfterRename() { assertFieldNames( """ from employees @@ -1982,7 +1982,30 @@ public void testDropWildcardedFields_AfterRename() { ); } - public void testDropWildcardFields_WithLookupJoin() { + public void testDropWildcardFieldsAfterLookupJoins() { + assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled()); + assertFieldNames(""" + FROM sample_data + | EVAL client_ip = client_ip::keyword + | LOOKUP JOIN clientips_lookup ON client_ip + | LOOKUP JOIN message_types_lookup ON message + | SORT @timestamp + | DROP *e""", Set.of("*"), Set.of()); + } + + public void testDropWildcardFieldsAfterLookupJoins2() { + assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled()); + assertFieldNames(""" + FROM sample_data + | EVAL client_ip = client_ip::keyword + | LOOKUP JOIN clientips_lookup ON client_ip + | DROP *e, client_ip + | LOOKUP JOIN message_types_lookup ON message + | SORT @timestamp + | DROP *e""", Set.of("*"), Set.of()); + } + + public void testDropWildcardFieldsAfterLookupJoinsAndKeep() { assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled()); assertFieldNames( """ @@ -1998,6 +2021,55 @@ public void testDropWildcardFields_WithLookupJoin() { ); } + public void testDropWildcardFieldsAfterLookupJoinKeepLookupJoin() { + assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled()); + assertFieldNames( + """ + FROM sample_data + | EVAL client_ip = client_ip::keyword + | LOOKUP JOIN clientips_lookup ON client_ip + | KEEP @timestamp, *e*, client_ip + | LOOKUP JOIN message_types_lookup ON message + | SORT @timestamp + | DROP *e""", + Set.of("client_ip", "client_ip.*", "message", "message.*", "@timestamp", "@timestamp.*", "*e*", "*e", "*e.*"), + Set.of("message_types_lookup") + ); + } + + public void testDropWildcardFieldsAfterKeepAndLookupJoins() { + assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled()); + assertFieldNames( + """ + FROM sample_data + | EVAL client_ip = client_ip::keyword + | KEEP @timestamp, *e*, client_ip + | LOOKUP JOIN clientips_lookup ON client_ip + | LOOKUP JOIN message_types_lookup ON message + | SORT @timestamp + | DROP *e""", + Set.of("client_ip", "client_ip.*", "message", "message.*", "@timestamp", "@timestamp.*", "*e*", "*e", "*e.*"), + Set.of("clientips_lookup", "message_types_lookup") + ); + } + + public void testDropWildcardFieldsAfterKeepAndLookupJoins2() { + assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled()); + assertFieldNames( + """ + FROM sample_data + | EVAL client_ip = client_ip::keyword + | KEEP @timestamp, *e*, client_ip + | LOOKUP JOIN clientips_lookup ON client_ip + | DROP *e + | LOOKUP JOIN message_types_lookup ON message + | SORT @timestamp + | DROP *e, client_ip""", + Set.of("client_ip", "client_ip.*", "message", "message.*", "@timestamp", "@timestamp.*", "*e*", "*e", "*e.*"), + Set.of("clientips_lookup", "message_types_lookup") + ); + } + public void testForkFieldsWithKeepAfterFork() { assertFieldNames(""" FROM test