From d20aa58c220a93beaae4822400caaa7d312c1b2a Mon Sep 17 00:00:00 2001 From: Andrei Stefan Date: Mon, 19 May 2025 20:28:42 +0300 Subject: [PATCH 1/2] ESQL: Keep `DROP` attributes when resolving field names (#127009) (#128147) (cherry picked from commit 54f26680eae655052947b6b9d748c33cde745881) Co-authored-by: kanoshiou <73424326+kanoshiou@users.noreply.github.com> --- docs/changelog/127009.yaml | 6 +++ .../rest/generative/GenerativeRestTest.java | 1 - .../src/main/resources/drop.csv-spec | 16 ++++++++ .../src/main/resources/lookup-join.csv-spec | 23 +++++++++++ .../xpack/esql/action/EsqlCapabilities.java | 8 +++- .../xpack/esql/session/EsqlSession.java | 28 +++++++------ .../session/IndexResolverFieldNamesTests.java | 39 +++++++++++++++++++ 7 files changed, 107 insertions(+), 14 deletions(-) create mode 100644 docs/changelog/127009.yaml diff --git a/docs/changelog/127009.yaml b/docs/changelog/127009.yaml new file mode 100644 index 0000000000000..adbec139cae28 --- /dev/null +++ b/docs/changelog/127009.yaml @@ -0,0 +1,6 @@ +pr: 127009 +summary: "ESQL: Keep `DROP` attributes when resolving field names" +area: ES|QL +type: bug +issues: + - 126418 diff --git a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java index a123c1b8356a0..e41596bb300f0 100644 --- a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java +++ b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java @@ -50,7 +50,6 @@ public abstract class GenerativeRestTest extends ESRestTestCase { "token recognition error at: '``", // https://github.com/elastic/elasticsearch/issues/125870 "Unknown column \\[.*\\]", // https://github.com/elastic/elasticsearch/issues/126026 "optimized incorrectly due to missing references", // https://github.com/elastic/elasticsearch/issues/116781 - "No matches found for pattern", // https://github.com/elastic/elasticsearch/issues/126418 "The incoming YAML document exceeds the limit:" // still to investigate, but it seems to be specific to the test framework ); diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/drop.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/drop.csv-spec index 9886d6cce0ca2..eec8e073e3eec 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/drop.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/drop.csv-spec @@ -172,3 +172,19 @@ Milky Way Milky Way Milky Way ; + +dropAgainWithWildcardAfterEval +required_capability: drop_again_with_wildcard_after_eval +from languages +| eval language_code = 12, x = 13 +| drop language_code +| drop language* +| keep x +| limit 3 +; + +x:integer +13 +13 +13 +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec index 78d7c4e1bdaa4..1fe767e914f8d 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec @@ -1567,6 +1567,29 @@ Spanish German ; +dropAgainWithWildcardAfterEval2 +required_capability: join_lookup_v12 +required_capability: drop_again_with_wildcard_after_eval +from addresses,cartesian_multipolygons,hosts +| rename city.name as message +| lookup join message_types_lookup on message +| eval card = -6013949614291505456, hOntTwnVC = null, PQAF = null, DXkxCFXyw = null, number = -7336429038807752405 +| eval dewAwHC = -1186293612, message = null +| sort number ASC, street ASC, ip0 DESC, name ASC NULLS FIRST, host ASC +| drop number, host_group, *umber, `city.country.continent.name`, dewAwHC, `zip_code`, `message`, city.country.continent.planet.name, `name`, `ip1`, message, zip_code +| drop description, *e, id +| keep `hOntTwnVC`, city.country.continent.planet.galaxy, street +| limit 5 +; + +hOntTwnVC:null | city.country.continent.planet.galaxy:keyword | street:keyword +null | Milky Way | Kearny St +null | Milky Way | Keizersgracht +null | Milky Way | Marunouchi +null | null | null +null | null | null +; + ############################################### # LOOKUP JOIN on date_nanos field ############################################### diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index abe66e7308225..c53736c8a3383 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -734,7 +734,13 @@ public enum Cap { * During resolution (pre-analysis) we have to consider that joins or enriches can override EVALuated values * https://github.com/elastic/elasticsearch/issues/126419 */ - FIX_JOIN_MASKING_EVAL; + FIX_JOIN_MASKING_EVAL, + + /** + * Support for keeping `DROP` attributes when resolving field names. + * see ES|QL: no matches for pattern #126418 + */ + DROP_AGAIN_WITH_WILDCARD_AFTER_EVAL; private final boolean enabled; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java index 18cdd5dc50f2b..799edeec09db7 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java @@ -576,10 +576,16 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy } AttributeSet references = new AttributeSet(); - // "keep" attributes are special whenever a wildcard is used in their name + // "keep" and "drop" attributes are special whenever a wildcard is used in their name, as the wildcard can shadow some + // attributes ("lookup join" generated columns among others) and steps like removal of Aliases should ignore the fields + // to remove if their name matches one of these wildcards. + // // ie "from test | eval lang = languages + 1 | keep *l" should consider both "languages" and "*l" as valid fields to ask for - AttributeSet keepCommandReferences = new AttributeSet(); - AttributeSet keepJoinReferences = new AttributeSet(); + // "from test | eval first_name = 1 | drop first_name | drop *name should also consider "*name" as valid field to ask for + // + // NOTE: the grammar allows wildcards to be used in other commands as well, but these are forbidden in the LogicalPlanBuilder + var shadowingRefs = new AttributeSet(); + var keepJoinRefs = new AttributeSet(); Set wildcardJoinIndices = new java.util.HashSet<>(); boolean[] canRemoveAliases = new boolean[] { true }; @@ -601,14 +607,14 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy references.addAll(enrichRefs); } else if (p instanceof LookupJoin join) { if (join.config().type() instanceof JoinTypes.UsingJoinType usingJoinType) { - keepJoinReferences.addAll(usingJoinType.columns()); + keepJoinRefs.addAll(usingJoinType.columns()); } - if (keepCommandReferences.isEmpty()) { + if (shadowingRefs.isEmpty()) { // No KEEP commands after the JOIN, so we need to mark this index for "*" field resolution wildcardJoinIndices.add(((UnresolvedRelation) join.right()).indexPattern().indexPattern()); } else { // Keep commands can reference the join columns with names that shadow aliases, so we block their removal - keepJoinReferences.addAll(keepCommandReferences); + keepJoinRefs.addAll(shadowingRefs); } } else { references.addAll(p.references()); @@ -620,12 +626,10 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy p.forEachExpression(UnresolvedNamePattern.class, up -> { var ua = new UnresolvedAttribute(up.source(), up.name()); references.add(ua); - if (p instanceof Keep) { - keepCommandReferences.add(ua); - } + shadowingRefs.add(ua); }); if (p instanceof Keep) { - keepCommandReferences.addAll(p.references()); + shadowingRefs.addAll(p.references()); } } @@ -655,13 +659,13 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy if (fieldNames.contains(alias.name())) { return; } - references.removeIf(attr -> matchByName(attr, alias.name(), keepCommandReferences.contains(attr))); + references.removeIf(attr -> matchByName(attr, alias.name(), shadowingRefs.contains(attr))); }); } }); // Add JOIN ON column references afterward to avoid Alias removal - references.addAll(keepJoinReferences); + references.addAll(keepJoinRefs); // If any JOIN commands need wildcard field-caps calls, persist the index names if (wildcardJoinIndices.isEmpty() == false) { result = result.withWildcardJoinIndices(wildcardJoinIndices); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java index 7f0330101c8f6..e306661420d93 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java @@ -1632,6 +1632,45 @@ public void testEnrichAndJoinMaskingEvalWh() { | keep emp_no, language_name""", Set.of("emp_no", "language_name", "languages", "language_name.*", "languages.*", "emp_no.*")); } + public void testDropAgainWithWildcardAfterEval() { + assertFieldNames(""" + from employees + | eval full_name = 12 + | drop full_name + | drop *name + | keep emp_no + """, Set.of("emp_no", "emp_no.*", "*name", "*name.*")); + } + + public void testDropWildcardedFields_AfterRename() { + assertFieldNames( + """ + from employees + | rename first_name AS first_names, last_name AS last_names + | eval first_names = 1 + | drop first_names + | drop *_names + | keep gender""", + Set.of("first_name", "first_name.*", "last_name", "last_name.*", "*_names", "*_names.*", "gender", "gender.*") + ); + } + + public void testDropWildcardFields_WithLookupJoin() { + assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled()); + assertFieldNames( + """ + FROM sample_data + | EVAL client_ip = client_ip::keyword + | LOOKUP JOIN clientips_lookup ON client_ip + | LOOKUP JOIN message_types_lookup ON message + | KEEP @timestamp, message, *e* + | SORT @timestamp + | DROP *e""", + Set.of("client_ip", "client_ip.*", "message", "message.*", "@timestamp", "@timestamp.*", "*e*", "*e", "*e.*"), + Set.of() + ); + } + private Set fieldNames(String query, Set enrichPolicyMatchFields) { var preAnalysisResult = new EsqlSession.PreAnalysisResult(null); return EsqlSession.fieldNames(parser.createStatement(query), enrichPolicyMatchFields, preAnalysisResult).fieldNames(); From d52861524a10d39b6cd73efc8912e5d364c0d678 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 2 Jul 2025 13:26:31 +0000 Subject: [PATCH 2/2] [CI] Auto commit changes from spotless --- .../org/elasticsearch/xpack/esql/action/EsqlCapabilities.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index c53736c8a3383..bd46726eddf75 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -735,7 +735,7 @@ public enum Cap { * https://github.com/elastic/elasticsearch/issues/126419 */ FIX_JOIN_MASKING_EVAL, - + /** * Support for keeping `DROP` attributes when resolving field names. * see ES|QL: no matches for pattern #126418