diff --git a/docs/changelog/127563.yaml b/docs/changelog/127563.yaml new file mode 100644 index 0000000000000..1e88e9d487b25 --- /dev/null +++ b/docs/changelog/127563.yaml @@ -0,0 +1,6 @@ +pr: 127563 +summary: "ESQL: Avoid unintended attribute removal" +area: ES|QL +type: bug +issues: + - 127468 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec index 2b3b0bee93471..f8704a20af8c3 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec @@ -331,3 +331,43 @@ ROW a="b c d x"| DISSECT a "%{b} %{} %{d} %{}"; a:keyword | b:keyword | d:keyword b c d x | b | d ; + +avoidAttributesRemoval +// https://github.com/elastic/elasticsearch/issues/127468 +required_capability: keep_regex_extract_attributes +required_capability: join_lookup_v12 +from message_types +| eval type = 1 +| lookup join message_types_lookup on message +| drop message +| dissect type "%{b}" +| stats x = max(b) +| keep x +; + +x:keyword +Success +; + +avoidAttributesRemoval2 +// https://github.com/elastic/elasticsearch/issues/127468 +required_capability: keep_regex_extract_attributes +required_capability: join_lookup_v12 +FROM sample_data, employees +| EVAL client_ip = client_ip::keyword +| RENAME languages AS language_code +| LOOKUP JOIN clientips_lookup ON client_ip +| EVAL type = 1::keyword +| EVAL type = 2 +| LOOKUP JOIN message_types_lookup ON message +| LOOKUP JOIN languages_lookup ON language_code +| DISSECT type "%{type_as_text}" +| KEEP message +| WHERE message IS NOT NULL +| SORT message DESC +| LIMIT 1 +; + +message:keyword +Disconnected +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec index 6dc9148ffc0e8..320861786a39d 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec @@ -297,3 +297,35 @@ row text = "123 abc", int = 5 | sort int asc | grok text "%{NUMBER:text:int} %{W text:integer | int:integer | description:keyword 123 | 5 | abc ; + +avoidAttributesRemoval +// https://github.com/elastic/elasticsearch/issues/127468 +required_capability: union_types +required_capability: join_lookup_v12 +required_capability: keep_regex_extract_attributes +from multivalue_points,h*,messa* +| eval `card` = true, PbehoQUqKSF = "VLGjhcgNkQiEVyCLo", DsxMWtGL = true, qSxTIvUorMim = true, `location` = 8593178066470220111, type = -446161601, FSkGQkgmS = false +| eval PbehoQUqKSF = 753987034, HLNMQfQj = true, `within` = true, `id` = "JDKKkYwhhh", lk = null, aecuvjTkgZza = 510616700, aDAMpuVtNX = null, qCopgNZPt = "AjhJUtZefqKdJYH", BxHHlFoA = "isBrmhKLc" +| rename message as message +| lookup join message_types_lookup on message +| sort PbehoQUqKSF DESC, ip1 DESC NULLS LAST +| limit 5845 +| drop `subset`, ip*, `card`, `within`, description, `aecuvjTkgZza`, `ip0`, height_range, DsxMWtGL, `aDAMpuVtNX`, PbehoQUqKSF, `intersects`, aDAMpuVtNX, *ight_range, HLNMQfQj, `FSkGQkgmS`, BxHHlFoA, card +| grok type "%{WORD:GknCxQFo}" +| eval `location` = null, ZjWUUvGusyyz = null, HeeKIpzgh = false, `id` = 4325287503714500302, host = false, `lk` = null, HvTQdOqFajpH = false, fKNlsYoT = true, `location` = -1158449473, `qCopgNZPt` = 1219986202615280617 +| drop HeeKIpzg*, `ZjWUUvGusyyz`, `message`, `type`, `lk` +| grok GknCxQFo "%{WORD:location} %{WORD:HvTQdOqFajpH}" +| drop HvTQdOqFajpH, `location`, centroid +| mv_expand GknCxQFo +| limit 410 +| limit 3815 +| rename `id` AS `GknCxQFo` +| grok host_group "%{WORD:oGQQZHxQHj} %{WORD:qCopgNZPt} %{WORD:vHKOmmocPcTO}" +| stats BkQXJRMeAM = min(GknCxQFo) +| keep `BkQXJRMeAM` +; + +BkQXJRMeAM:long +4325287503714500302 +; + diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 3a612f556f5d3..fe9da1a85cff4 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -860,7 +860,14 @@ public enum Cap { * During resolution (pre-analysis) we have to consider that joins can override regex extracted values * see ES|QL: pruning of JOINs leads to missing fields #127467 */ - FIX_JOIN_MASKING_REGEX_EXTRACT; + FIX_JOIN_MASKING_REGEX_EXTRACT, + + /** + * Avid GROK and DISSECT attributes being removed when resolving fields. + * see ES|QL: Grok only supports KEYWORD or TEXT values, + * found expression [type] type [INTEGER] #127468 + */ + KEEP_REGEX_EXTRACT_ATTRIBUTES; private final boolean enabled; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java index b29ad8788bab8..72c792d6f034b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java @@ -620,7 +620,7 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set enrichPolicy // // and ips_policy enriches the results with the same name ip field), // these aliases should be kept in the list of fields. - if (canRemoveAliases[0] && couldOverrideAliases(p)) { + if (canRemoveAliases[0] && p.anyMatch(EsqlSession::couldOverrideAliases)) { canRemoveAliases[0] = false; } if (canRemoveAliases[0]) { @@ -687,7 +687,8 @@ private static boolean couldOverrideAliases(LogicalPlan p) { || p instanceof Project || p instanceof RegexExtract || p instanceof Rename - || p instanceof TopN) == false; + || p instanceof TopN + || p instanceof UnresolvedRelation) == false; } private static boolean matchByName(Attribute attr, String other, boolean skipIfPattern) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java index 55ed3b0940a2c..17f9c48bebb8b 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java @@ -604,7 +604,20 @@ public void testEnrichEval() { | eval language = concat(x, "-", lang) | keep emp_no, x, lang, language | sort emp_no desc | limit 3""", - Set.of("languages", "languages.*", "emp_no", "emp_no.*", "language_name", "language_name.*", "x", "x.*", "lang", "lang.*") + Set.of( + "emp_no", + "x", + "lang", + "language", + "language_name", + "languages", + "x.*", + "language_name.*", + "languages.*", + "emp_no.*", + "lang.*", + "language.*" + ) ); } @@ -1355,7 +1368,7 @@ public void testAvoidGrokAttributesRemoval() { | grok type "%{WORD:b}" | stats x = max(b) | keep x""", Set.of()); - assertThat(fieldNames, equalTo(Set.of("message", "x", "x.*", "message.*"))); + assertThat(fieldNames, equalTo(Set.of("x", "b", "type", "message", "x.*", "message.*", "type.*", "b.*"))); } public void testAvoidGrokAttributesRemoval2() { @@ -1388,6 +1401,62 @@ public void testAvoidGrokAttributesRemoval3() { } + /** + * @see ES|QL: Grok only supports KEYWORD or TEXT values, + * found expression [type] type [INTEGER] + */ + public void testAvoidGrokAttributesRemoval4() { + assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled()); + Set fieldNames = fieldNames(""" + from message_types + | eval type = 1 + | lookup join message_types_lookup on message + | drop message + | grok type "%{WORD:b}" + | stats x = max(b) + | keep x""", Set.of()); + assertThat(fieldNames, equalTo(Set.of("x", "b", "type", "message", "x.*", "message.*", "type.*", "b.*"))); + } + + /** + * @see ES|QL: Grok only supports KEYWORD or TEXT values, + * found expression [type] type [INTEGER] + */ + public void testAvoidGrokAttributesRemoval5() { + assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled()); + Set fieldNames = fieldNames(""" + FROM sample_data, employees + | EVAL client_ip = client_ip::keyword + | RENAME languages AS language_code + | LOOKUP JOIN clientips_lookup ON client_ip + | EVAL type = 1::keyword + | EVAL type = 2 + | LOOKUP JOIN message_types_lookup ON message + | LOOKUP JOIN languages_lookup ON language_code + | DISSECT type "%{type_as_text}" + | KEEP message + | WHERE message IS NOT NULL + | SORT message DESC + | LIMIT 1""", Set.of()); + assertThat( + fieldNames, + equalTo( + Set.of( + "message", + "type", + "languages", + "client_ip", + "language_code", + "language_code.*", + "client_ip.*", + "message.*", + "type.*", + "languages.*" + ) + ) + ); + } + public void testEnrichOnDefaultField() { Set fieldNames = fieldNames(""" from employees