diff --git a/docs/changelog/127009.yaml b/docs/changelog/127009.yaml new file mode 100644 index 0000000000000..adbec139cae28 --- /dev/null +++ b/docs/changelog/127009.yaml @@ -0,0 +1,6 @@ +pr: 127009 +summary: "ESQL: Keep `DROP` attributes when resolving field names" +area: ES|QL +type: bug +issues: + - 126418 diff --git a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java index dabbc2b7f4bdd..e0826ca710eb7 100644 --- a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java +++ b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java @@ -51,7 +51,6 @@ public abstract class GenerativeRestTest extends ESRestTestCase { "Unknown column \\[\\]", // https://github.com/elastic/elasticsearch/issues/121741, "Plan \\[ProjectExec\\[\\[.* optimized incorrectly due to missing references", // https://github.com/elastic/elasticsearch/issues/125866 "optimized incorrectly due to missing references", // https://github.com/elastic/elasticsearch/issues/116781 - "No matches found for pattern", // https://github.com/elastic/elasticsearch/issues/126418 "Unknown column", // https://github.com/elastic/elasticsearch/issues/127467 "only supports KEYWORD or TEXT values", // https://github.com/elastic/elasticsearch/issues/127468 "The incoming YAML document exceeds the limit:" // still to investigate, but it seems to be specific to the test framework diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/drop.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/drop.csv-spec index 9886d6cce0ca2..eec8e073e3eec 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/drop.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/drop.csv-spec @@ -172,3 +172,19 @@ Milky Way Milky Way Milky Way ; + +dropAgainWithWildcardAfterEval +required_capability: drop_again_with_wildcard_after_eval +from languages +| eval language_code = 12, x = 13 +| drop language_code +| drop language* +| keep x +| limit 3 +; + +x:integer +13 +13 +13 +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec index b9c65707146ff..9aaf5552c7aed 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec @@ -1650,3 +1650,26 @@ event_duration:long 2764889 3450233 ; + +dropAgainWithWildcardAfterEval2 +required_capability: join_lookup_v12 +required_capability: drop_again_with_wildcard_after_eval +from addresses,cartesian_multipolygons,hosts +| rename city.name as message +| lookup join message_types_lookup on message +| eval card = -6013949614291505456, hOntTwnVC = null, PQAF = null, DXkxCFXyw = null, number = -7336429038807752405 +| eval dewAwHC = -1186293612, message = null +| sort number ASC, street ASC, ip0 DESC, name ASC NULLS FIRST, host ASC +| drop number, host_group, *umber, `city.country.continent.name`, dewAwHC, `zip_code`, `message`, city.country.continent.planet.name, `name`, `ip1`, message, zip_code +| drop description, *e, id +| keep `hOntTwnVC`, city.country.continent.planet.galaxy, street +| limit 5 +; + +hOntTwnVC:null | city.country.continent.planet.galaxy:keyword | street:keyword +null | Milky Way | Kearny St +null | Milky Way | Keizersgracht +null | Milky Way | Marunouchi +null | null | null +null | null | null +; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index defa70737cef8..86649e7d4a9eb 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -1036,6 +1036,12 @@ public enum Cap { */ FIX_JOIN_MASKING_EVAL, + /** + * Support for keeping `DROP` attributes when resolving field names. + * see ES|QL: no matches for pattern #126418 + */ + DROP_AGAIN_WITH_WILDCARD_AFTER_EVAL, + /** * Support last_over_time aggregation that gets evaluated per time-series */ diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java index 7312ddb7da790..91c8547dc1bfb 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java @@ -590,9 +590,15 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy } var referencesBuilder = AttributeSet.builder(); - // "keep" attributes are special whenever a wildcard is used in their name + // "keep" and "drop" attributes are special whenever a wildcard is used in their name, as the wildcard can shadow some + // attributes ("lookup join" generated columns among others) and steps like removal of Aliases should ignore the fields + // to remove if their name matches one of these wildcards. + // // ie "from test | eval lang = languages + 1 | keep *l" should consider both "languages" and "*l" as valid fields to ask for - var keepCommandRefsBuilder = AttributeSet.builder(); + // "from test | eval first_name = 1 | drop first_name | drop *name should also consider "*name" as valid field to ask for + // + // NOTE: the grammar allows wildcards to be used in other commands as well, but these are forbidden in the LogicalPlanBuilder + var shadowingRefsBuilder = AttributeSet.builder(); var keepJoinRefsBuilder = AttributeSet.builder(); Set wildcardJoinIndices = new java.util.HashSet<>(); @@ -617,12 +623,12 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy if (join.config().type() instanceof JoinTypes.UsingJoinType usingJoinType) { keepJoinRefsBuilder.addAll(usingJoinType.columns()); } - if (keepCommandRefsBuilder.isEmpty()) { + if (shadowingRefsBuilder.isEmpty()) { // No KEEP commands after the JOIN, so we need to mark this index for "*" field resolution wildcardJoinIndices.add(((UnresolvedRelation) join.right()).indexPattern().indexPattern()); } else { // Keep commands can reference the join columns with names that shadow aliases, so we block their removal - keepJoinRefsBuilder.addAll(keepCommandRefsBuilder); + keepJoinRefsBuilder.addAll(shadowingRefsBuilder); } } else { referencesBuilder.addAll(p.references()); @@ -634,12 +640,10 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy p.forEachExpression(UnresolvedNamePattern.class, up -> { var ua = new UnresolvedAttribute(up.source(), up.name()); referencesBuilder.add(ua); - if (p instanceof Keep) { - keepCommandRefsBuilder.add(ua); - } + shadowingRefsBuilder.add(ua); }); if (p instanceof Keep) { - keepCommandRefsBuilder.addAll(p.references()); + shadowingRefsBuilder.addAll(p.references()); } } @@ -669,7 +673,7 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy if (fieldNames.contains(alias.name())) { return; } - referencesBuilder.removeIf(attr -> matchByName(attr, alias.name(), keepCommandRefsBuilder.contains(attr))); + referencesBuilder.removeIf(attr -> matchByName(attr, alias.name(), shadowingRefsBuilder.contains(attr))); }); } }); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java index ef54dec2a0b60..539036d7d1cf0 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java @@ -1710,6 +1710,45 @@ public void testEnrichAndJoinMaskingEvalWh() { | keep emp_no, language_name""", Set.of("emp_no", "language_name", "languages", "language_name.*", "languages.*", "emp_no.*")); } + public void testDropAgainWithWildcardAfterEval() { + assertFieldNames(""" + from employees + | eval full_name = 12 + | drop full_name + | drop *name + | keep emp_no + """, Set.of("emp_no", "emp_no.*", "*name", "*name.*")); + } + + public void testDropWildcardedFields_AfterRename() { + assertFieldNames( + """ + from employees + | rename first_name AS first_names, last_name AS last_names + | eval first_names = 1 + | drop first_names + | drop *_names + | keep gender""", + Set.of("first_name", "first_name.*", "last_name", "last_name.*", "*_names", "*_names.*", "gender", "gender.*") + ); + } + + public void testDropWildcardFields_WithLookupJoin() { + assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled()); + assertFieldNames( + """ + FROM sample_data + | EVAL client_ip = client_ip::keyword + | LOOKUP JOIN clientips_lookup ON client_ip + | LOOKUP JOIN message_types_lookup ON message + | KEEP @timestamp, message, *e* + | SORT @timestamp + | DROP *e""", + Set.of("client_ip", "client_ip.*", "message", "message.*", "@timestamp", "@timestamp.*", "*e*", "*e", "*e.*"), + Set.of() + ); + } + private Set fieldNames(String query, Set enrichPolicyMatchFields) { var preAnalysisResult = new EsqlSession.PreAnalysisResult(null); return EsqlSession.fieldNames(parser.createStatement(query), enrichPolicyMatchFields, preAnalysisResult).fieldNames();