From 8cbfa7f82e23cd44bde35b91e7b7562902f9692a Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Mon, 7 Jul 2025 09:11:56 +0200 Subject: [PATCH 1/2] ESQL: Fix wildcard DROP after LOOKUP JOIN (#130448) In EsqlSession#fieldNames, keep track of wildcard patterns in DROP separately from the references and wildcard patterns in KEEP commands. Only the latter are relevant to determine if we need to ask for all fields for the lookup index in the field caps request. (cherry picked from commit 05fc6f2089032348172e90ecaa3a5b78eaa44836) # Conflicts: # x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec # x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java # x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java # x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java --- docs/changelog/130448.yaml | 6 ++ .../src/main/resources/lookup-join.csv-spec | 34 +++++++++ .../xpack/esql/action/EsqlCapabilities.java | 7 ++ .../xpack/esql/session/EsqlSession.java | 40 ++++++---- .../session/IndexResolverFieldNamesTests.java | 76 ++++++++++++++++++- 5 files changed, 147 insertions(+), 16 deletions(-) create mode 100644 docs/changelog/130448.yaml diff --git a/docs/changelog/130448.yaml b/docs/changelog/130448.yaml new file mode 100644 index 0000000000000..d99ef37b0e071 --- /dev/null +++ b/docs/changelog/130448.yaml @@ -0,0 +1,6 @@ +pr: 130448 +summary: Fix wildcard drop after lookup join +area: ES|QL +type: bug +issues: + - 129561 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec index dba2f653b1d01..cccd7187a15eb 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec @@ -1682,3 +1682,37 @@ max:long 3450233 8268153 ; + +wildcardDropAfterLookupJoin +required_capability: join_lookup_v12 +required_capability: drop_with_wildcard_after_lookup_join + +ROW somefield = 0, type = "Production" +| KEEP somefield, type +| LOOKUP JOIN message_types_lookup ON type +| DROP *field +; + +type:keyword | message:keyword +Production | Production environment +; + + +wildcardDropAfterLookupJoinTwice +required_capability: join_lookup_v12 +required_capability: drop_with_wildcard_after_lookup_join + +ROW somefield = 0, type = "Production" +| KEEP somefield, type +| EVAL otherfield = 123, language_code = 3 +| LOOKUP JOIN message_types_lookup ON type +| DROP *field +| EVAL foofield = 123 +| KEEP * +| LOOKUP JOIN languages_lookup ON language_code +| DROP *ge, *field +; + +type:keyword | language_code:integer | language_name:keyword +Production | 3 | Spanish +; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index bfbea6fddf0da..b9503e2bbb2d3 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -742,6 +742,13 @@ public enum Cap { */ DROP_AGAIN_WITH_WILDCARD_AFTER_EVAL, + /** + * Correctly ask for all fields from lookup indices even when there is e.g. a {@code DROP *field} after. + * See + * ES|QL: missing columns for wildcard drop after lookup join #129561 + */ + DROP_WITH_WILDCARD_AFTER_LOOKUP_JOIN, + /** * During resolution (pre-analysis) we have to consider that joins can override regex extracted values * see ES|QL: pruning of JOINs leads to missing fields #127467 diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java index 0dffdbc14b838..786c1953f328a 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java @@ -578,16 +578,20 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy } AttributeSet references = new AttributeSet(); - // "keep" and "drop" attributes are special whenever a wildcard is used in their name, as the wildcard can shadow some - // attributes ("lookup join" generated columns among others) and steps like removal of Aliases should ignore the fields - // to remove if their name matches one of these wildcards. + // "keep" and "drop" attributes are special whenever a wildcard is used in their name, as the wildcard can cover some + // attributes ("lookup join" generated columns among others); steps like removal of Aliases should ignore fields matching the + // wildcards. // - // ie "from test | eval lang = languages + 1 | keep *l" should consider both "languages" and "*l" as valid fields to ask for - // "from test | eval first_name = 1 | drop first_name | drop *name should also consider "*name" as valid field to ask for + // E.g. "from test | eval lang = languages + 1 | keep *l" should consider both "languages" and "*l" as valid fields to ask for + // "from test | eval first_name = 1 | drop first_name | drop *name" should also consider "*name" as valid field to ask for // // NOTE: the grammar allows wildcards to be used in other commands as well, but these are forbidden in the LogicalPlanBuilder - var shadowingRefs = new AttributeSet(); - var keepJoinRefs = new AttributeSet(); + // Except in KEEP and DROP. + var keepRefs = new AttributeSet(); + var dropWildcardRefs = new AttributeSet(); + // fields required to request for lookup joins to work + var joinRefs = new AttributeSet(); + // lookup indices where we request "*" because we may require all their fields Set wildcardJoinIndices = new java.util.HashSet<>(); boolean[] canRemoveAliases = new boolean[] { true }; @@ -605,14 +609,14 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy references.addAll(enrichRefs); } else if (p instanceof LookupJoin join) { if (join.config().type() instanceof JoinTypes.UsingJoinType usingJoinType) { - keepJoinRefs.addAll(usingJoinType.columns()); + joinRefs.addAll(usingJoinType.columns()); } - if (shadowingRefs.isEmpty()) { + if (keepRefs.isEmpty()) { // No KEEP commands after the JOIN, so we need to mark this index for "*" field resolution wildcardJoinIndices.add(((UnresolvedRelation) join.right()).indexPattern().indexPattern()); } else { // Keep commands can reference the join columns with names that shadow aliases, so we block their removal - keepJoinRefs.addAll(shadowingRefs); + joinRefs.addAll(keepRefs); } } else { references.addAll(p.references()); @@ -624,10 +628,16 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy p.forEachExpression(UnresolvedNamePattern.class, up -> { var ua = new UnresolvedAttribute(up.source(), up.name()); references.add(ua); - shadowingRefs.add(ua); + if (p instanceof Keep) { + keepRefs.add(ua); + } else if (p instanceof Drop) { + dropWildcardRefs.add(ua); + } else { + throw new IllegalStateException("Only KEEP and DROP should allow wildcards"); + } }); if (p instanceof Keep) { - shadowingRefs.addAll(p.references()); + keepRefs.addAll(p.references()); } } @@ -661,13 +671,15 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy if (fieldNames.contains(ne.name())) { return; } - references.removeIf(attr -> matchByName(attr, ne.name(), shadowingRefs.contains(attr))); + references.removeIf( + attr -> matchByName(attr, ne.name(), keepRefs.contains(attr) || dropWildcardRefs.contains(attr)) + ); }); } }); // Add JOIN ON column references afterward to avoid Alias removal - references.addAll(keepJoinRefs); + references.addAll(joinRefs); // If any JOIN commands need wildcard field-caps calls, persist the index names if (wildcardJoinIndices.isEmpty() == false) { result = result.withWildcardJoinIndices(wildcardJoinIndices); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java index 17f9c48bebb8b..a10bc7bd73116 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java @@ -1758,7 +1758,7 @@ public void testDropAgainWithWildcardAfterEval() { """, Set.of("emp_no", "emp_no.*", "*name", "*name.*")); } - public void testDropWildcardedFields_AfterRename() { + public void testDropWildcardFieldsAfterRename() { assertFieldNames( """ from employees @@ -1771,7 +1771,30 @@ public void testDropWildcardedFields_AfterRename() { ); } - public void testDropWildcardFields_WithLookupJoin() { + public void testDropWildcardFieldsAfterLookupJoins() { + assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled()); + assertFieldNames(""" + FROM sample_data + | EVAL client_ip = client_ip::keyword + | LOOKUP JOIN clientips_lookup ON client_ip + | LOOKUP JOIN message_types_lookup ON message + | SORT @timestamp + | DROP *e""", Set.of("*"), Set.of()); + } + + public void testDropWildcardFieldsAfterLookupJoins2() { + assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled()); + assertFieldNames(""" + FROM sample_data + | EVAL client_ip = client_ip::keyword + | LOOKUP JOIN clientips_lookup ON client_ip + | DROP *e, client_ip + | LOOKUP JOIN message_types_lookup ON message + | SORT @timestamp + | DROP *e""", Set.of("*"), Set.of()); + } + + public void testDropWildcardFieldsAfterLookupJoinsAndKeep() { assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled()); assertFieldNames( """ @@ -1787,6 +1810,55 @@ public void testDropWildcardFields_WithLookupJoin() { ); } + public void testDropWildcardFieldsAfterLookupJoinKeepLookupJoin() { + assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled()); + assertFieldNames( + """ + FROM sample_data + | EVAL client_ip = client_ip::keyword + | LOOKUP JOIN clientips_lookup ON client_ip + | KEEP @timestamp, *e*, client_ip + | LOOKUP JOIN message_types_lookup ON message + | SORT @timestamp + | DROP *e""", + Set.of("client_ip", "client_ip.*", "message", "message.*", "@timestamp", "@timestamp.*", "*e*", "*e", "*e.*"), + Set.of("message_types_lookup") + ); + } + + public void testDropWildcardFieldsAfterKeepAndLookupJoins() { + assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled()); + assertFieldNames( + """ + FROM sample_data + | EVAL client_ip = client_ip::keyword + | KEEP @timestamp, *e*, client_ip + | LOOKUP JOIN clientips_lookup ON client_ip + | LOOKUP JOIN message_types_lookup ON message + | SORT @timestamp + | DROP *e""", + Set.of("client_ip", "client_ip.*", "message", "message.*", "@timestamp", "@timestamp.*", "*e*", "*e", "*e.*"), + Set.of("clientips_lookup", "message_types_lookup") + ); + } + + public void testDropWildcardFieldsAfterKeepAndLookupJoins2() { + assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled()); + assertFieldNames( + """ + FROM sample_data + | EVAL client_ip = client_ip::keyword + | KEEP @timestamp, *e*, client_ip + | LOOKUP JOIN clientips_lookup ON client_ip + | DROP *e + | LOOKUP JOIN message_types_lookup ON message + | SORT @timestamp + | DROP *e, client_ip""", + Set.of("client_ip", "client_ip.*", "message", "message.*", "@timestamp", "@timestamp.*", "*e*", "*e", "*e.*"), + Set.of("clientips_lookup", "message_types_lookup") + ); + } + private Set fieldNames(String query, Set enrichPolicyMatchFields) { var preAnalysisResult = new EsqlSession.PreAnalysisResult(null); return EsqlSession.fieldNames(parser.createStatement(query), enrichPolicyMatchFields, preAnalysisResult).fieldNames(); From e3cd83b54b5077df85de622941b94c54b8aa7e5e Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 7 Jul 2025 09:22:23 +0000 Subject: [PATCH 2/2] [CI] Auto commit changes from spotless --- .../org/elasticsearch/xpack/esql/session/EsqlSession.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java index 786c1953f328a..cd4e6599b5d78 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java @@ -671,9 +671,7 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy if (fieldNames.contains(ne.name())) { return; } - references.removeIf( - attr -> matchByName(attr, ne.name(), keepRefs.contains(attr) || dropWildcardRefs.contains(attr)) - ); + references.removeIf(attr -> matchByName(attr, ne.name(), keepRefs.contains(attr) || dropWildcardRefs.contains(attr))); }); } });