From 989bc5eb7210f04e7b2f71a7f610fe00d1f9087b Mon Sep 17 00:00:00 2001 From: Andrei Stefan Date: Wed, 21 May 2025 18:13:29 +0300 Subject: [PATCH] [8.19] ESQL: Avoid unintended attribute removal (#127563) (#128231) * ESQL: Avoid unintended attribute removal (#127563) --- docs/changelog/127563.yaml | 6 ++ .../src/main/resources/dissect.csv-spec | 40 +++++++++++ .../src/main/resources/grok.csv-spec | 32 +++++++++ .../xpack/esql/action/EsqlCapabilities.java | 9 ++- .../xpack/esql/session/EsqlSession.java | 5 +- .../session/IndexResolverFieldNamesTests.java | 71 ++++++++++++++++++- 6 files changed, 159 insertions(+), 4 deletions(-) create mode 100644 docs/changelog/127563.yaml diff --git a/docs/changelog/127563.yaml b/docs/changelog/127563.yaml new file mode 100644 index 0000000000000..1e88e9d487b25 --- /dev/null +++ b/docs/changelog/127563.yaml @@ -0,0 +1,6 @@ +pr: 127563 +summary: "ESQL: Avoid unintended attribute removal" +area: ES|QL +type: bug +issues: + - 127468 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec index 2b3b0bee93471..f8704a20af8c3 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec @@ -331,3 +331,43 @@ ROW a="b c d x"| DISSECT a "%{b} %{} %{d} %{}"; a:keyword | b:keyword | d:keyword b c d x | b | d ; + +avoidAttributesRemoval +// https://github.com/elastic/elasticsearch/issues/127468 +required_capability: keep_regex_extract_attributes +required_capability: join_lookup_v12 +from message_types +| eval type = 1 +| lookup join message_types_lookup on message +| drop message +| dissect type "%{b}" +| stats x = max(b) +| keep x +; + +x:keyword +Success +; + +avoidAttributesRemoval2 +// https://github.com/elastic/elasticsearch/issues/127468 +required_capability: keep_regex_extract_attributes +required_capability: join_lookup_v12 +FROM sample_data, employees +| EVAL client_ip = client_ip::keyword +| RENAME languages AS language_code +| LOOKUP JOIN clientips_lookup ON client_ip +| EVAL type = 1::keyword +| EVAL type = 2 +| LOOKUP JOIN message_types_lookup ON message +| LOOKUP JOIN languages_lookup ON language_code +| DISSECT type "%{type_as_text}" +| KEEP message +| WHERE message IS NOT NULL +| SORT message DESC +| LIMIT 1 +; + +message:keyword +Disconnected +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec index 6dc9148ffc0e8..320861786a39d 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec @@ -297,3 +297,35 @@ row text = "123 abc", int = 5 | sort int asc | grok text "%{NUMBER:text:int} %{W text:integer | int:integer | description:keyword 123 | 5 | abc ; + +avoidAttributesRemoval +// https://github.com/elastic/elasticsearch/issues/127468 +required_capability: union_types +required_capability: join_lookup_v12 +required_capability: keep_regex_extract_attributes +from multivalue_points,h*,messa* +| eval `card` = true, PbehoQUqKSF = "VLGjhcgNkQiEVyCLo", DsxMWtGL = true, qSxTIvUorMim = true, `location` = 8593178066470220111, type = -446161601, FSkGQkgmS = false +| eval PbehoQUqKSF = 753987034, HLNMQfQj = true, `within` = true, `id` = "JDKKkYwhhh", lk = null, aecuvjTkgZza = 510616700, aDAMpuVtNX = null, qCopgNZPt = "AjhJUtZefqKdJYH", BxHHlFoA = "isBrmhKLc" +| rename message as message +| lookup join message_types_lookup on message +| sort PbehoQUqKSF DESC, ip1 DESC NULLS LAST +| limit 5845 +| drop `subset`, ip*, `card`, `within`, description, `aecuvjTkgZza`, `ip0`, height_range, DsxMWtGL, `aDAMpuVtNX`, PbehoQUqKSF, `intersects`, aDAMpuVtNX, *ight_range, HLNMQfQj, `FSkGQkgmS`, BxHHlFoA, card +| grok type "%{WORD:GknCxQFo}" +| eval `location` = null, ZjWUUvGusyyz = null, HeeKIpzgh = false, `id` = 4325287503714500302, host = false, `lk` = null, HvTQdOqFajpH = false, fKNlsYoT = true, `location` = -1158449473, `qCopgNZPt` = 1219986202615280617 +| drop HeeKIpzg*, `ZjWUUvGusyyz`, `message`, `type`, `lk` +| grok GknCxQFo "%{WORD:location} %{WORD:HvTQdOqFajpH}" +| drop HvTQdOqFajpH, `location`, centroid +| mv_expand GknCxQFo +| limit 410 +| limit 3815 +| rename `id` AS `GknCxQFo` +| grok host_group "%{WORD:oGQQZHxQHj} %{WORD:qCopgNZPt} %{WORD:vHKOmmocPcTO}" +| stats BkQXJRMeAM = min(GknCxQFo) +| keep `BkQXJRMeAM` +; + +BkQXJRMeAM:long +4325287503714500302 +; + diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index bd46726eddf75..13fc7ccb6472b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -740,7 +740,14 @@ public enum Cap { * Support for keeping `DROP` attributes when resolving field names. * see ES|QL: no matches for pattern #126418 */ - DROP_AGAIN_WITH_WILDCARD_AFTER_EVAL; + DROP_AGAIN_WITH_WILDCARD_AFTER_EVAL, + + /** + * Avid GROK and DISSECT attributes being removed when resolving fields. + * see ES|QL: Grok only supports KEYWORD or TEXT values, + * found expression [type] type [INTEGER] #127468 + */ + KEEP_REGEX_EXTRACT_ATTRIBUTES; private final boolean enabled; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java index 799edeec09db7..e17eca2123116 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java @@ -644,7 +644,7 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy // // and ips_policy enriches the results with the same name ip field), // these aliases should be kept in the list of fields. - if (canRemoveAliases[0] && couldOverrideAliases(p)) { + if (canRemoveAliases[0] && p.anyMatch(EsqlSession::couldOverrideAliases)) { canRemoveAliases[0] = false; } if (canRemoveAliases[0]) { @@ -707,7 +707,8 @@ private static boolean couldOverrideAliases(LogicalPlan p) { || p instanceof Project || p instanceof RegexExtract || p instanceof Rename - || p instanceof TopN) == false; + || p instanceof TopN + || p instanceof UnresolvedRelation) == false; } private static boolean matchByName(Attribute attr, String other, boolean skipIfPattern) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java index e306661420d93..e24a5510bab28 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java @@ -604,7 +604,20 @@ public void testEnrichEval() { | eval language = concat(x, "-", lang) | keep emp_no, x, lang, language | sort emp_no desc | limit 3""", - Set.of("languages", "languages.*", "emp_no", "emp_no.*", "language_name", "language_name.*", "x", "x.*", "lang", "lang.*") + Set.of( + "emp_no", + "x", + "lang", + "language", + "language_name", + "languages", + "x.*", + "language_name.*", + "languages.*", + "emp_no.*", + "lang.*", + "language.*" + ) ); } @@ -1341,6 +1354,62 @@ public void testDissectOverwriteName() { assertThat(fieldNames, equalTo(Set.of("emp_no", "emp_no.*", "first_name", "first_name.*"))); } + /** + * @see ES|QL: Grok only supports KEYWORD or TEXT values, + * found expression [type] type [INTEGER] + */ + public void testAvoidGrokAttributesRemoval4() { + assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled()); + Set fieldNames = fieldNames(""" + from message_types + | eval type = 1 + | lookup join message_types_lookup on message + | drop message + | grok type "%{WORD:b}" + | stats x = max(b) + | keep x""", Set.of()); + assertThat(fieldNames, equalTo(Set.of("x", "b", "type", "message", "x.*", "message.*", "type.*", "b.*"))); + } + + /** + * @see ES|QL: Grok only supports KEYWORD or TEXT values, + * found expression [type] type [INTEGER] + */ + public void testAvoidGrokAttributesRemoval5() { + assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled()); + Set fieldNames = fieldNames(""" + FROM sample_data, employees + | EVAL client_ip = client_ip::keyword + | RENAME languages AS language_code + | LOOKUP JOIN clientips_lookup ON client_ip + | EVAL type = 1::keyword + | EVAL type = 2 + | LOOKUP JOIN message_types_lookup ON message + | LOOKUP JOIN languages_lookup ON language_code + | DISSECT type "%{type_as_text}" + | KEEP message + | WHERE message IS NOT NULL + | SORT message DESC + | LIMIT 1""", Set.of()); + assertThat( + fieldNames, + equalTo( + Set.of( + "message", + "type", + "languages", + "client_ip", + "language_code", + "language_code.*", + "client_ip.*", + "message.*", + "type.*", + "languages.*" + ) + ) + ); + } + public void testEnrichOnDefaultField() { Set fieldNames = fieldNames(""" from employees