elastic · astefan · May 21, 2025 · Apr 30, 2025 · Apr 30, 2025 · Apr 30, 2025
diff --git a/docs/changelog/127563.yaml b/docs/changelog/127563.yaml
@@ -0,0 +1,6 @@
+pr: 127563
+summary: "ESQL: Avoid regex extract attributes removal"
+area: ES|QL
+type: bug
+issues:
+  - 127468
diff --git a/...ver/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java b/...ver/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java
@@ -53,7 +53,6 @@ public abstract class GenerativeRestTest extends ESRestTestCase {
         "optimized incorrectly due to missing references", // https://github.com/elastic/elasticsearch/issues/116781
         "No matches found for pattern", // https://github.com/elastic/elasticsearch/issues/126418
         "Unknown column", // https://github.com/elastic/elasticsearch/issues/127467
-        "only supports KEYWORD or TEXT values", // https://github.com/elastic/elasticsearch/issues/127468
         "The incoming YAML document exceeds the limit:" // still to investigate, but it seems to be specific to the test framework
     );
 

diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec
@@ -331,3 +331,19 @@ ROW a="b c d x"| DISSECT a "%{b} %{} %{d} %{}";
 a:keyword  | b:keyword | d:keyword
 b c d x    | b         | d
 ;
+
+avoidAttributesRemoval
+required_capability: keep_regex_extract_attributes
+required_capability: join_lookup_v12
+from message_types 
+| eval type = 1 
+| lookup join message_types_lookup on message 
+| drop message 
+| dissect type "%{b}" 
+| stats x = max(b) 
+| keep x
+;
+
+x:keyword
+Success
+;
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec
@@ -297,3 +297,34 @@ row text = "123 abc", int = 5 | sort int asc | grok text "%{NUMBER:text:int} %{W
 text:integer | int:integer | description:keyword
 123          | 5           | abc
 ;
+
+avoidAttributesRemoval
+required_capability: union_types
+required_capability: join_lookup_v12
+required_capability: keep_regex_extract_attributes
+from multivalue_points,h*,messa* 
+| eval  `card` = true, PbehoQUqKSF = "VLGjhcgNkQiEVyCLo", DsxMWtGL = true, qSxTIvUorMim = true, `location` = 8593178066470220111, type = -446161601, FSkGQkgmS = false 
+| eval  PbehoQUqKSF = 753987034, HLNMQfQj = true, `within` = true, `id` = "JDKKkYwhhh", lk = null, aecuvjTkgZza = 510616700, aDAMpuVtNX = null, qCopgNZPt = "AjhJUtZefqKdJYH", BxHHlFoA = "isBrmhKLc"
+| rename message as message 
+| lookup join message_types_lookup on message 
+| sort PbehoQUqKSF DESC, ip1 DESC NULLS LAST 
+| limit 5845 
+| drop `subset`, ip*, `card`, `within`, host.v*, description, `aecuvjTkgZza`, host.version, `ip0`, height_range, DsxMWtGL, host_group, `aDAMpuVtNX`, PbehoQUqKSF, `intersects`, `host.os`, aDAMpuVtNX, *ight_range, HLNMQfQj, `FSkGQkgmS`, BxHHlFoA, card 
+| grok type "%{WORD:GknCxQFo}" 
+| eval  `location` = null, ZjWUUvGusyyz = null, HeeKIpzgh = false, `id` = 4325287503714500302, host = false, `lk` = null, HvTQdOqFajpH = false, fKNlsYoT = true, `location` = -1158449473, `qCopgNZPt` = 1219986202615280617 
+| drop HeeKIpzg*, `ZjWUUvGusyyz`, `message`, `type`, `lk` 
+| grok GknCxQFo "%{WORD:location} %{WORD:HvTQdOqFajpH}" 
+| drop HvTQdOqFajpH, `location`, centroid 
+| mv_expand GknCxQFo 
+| limit 410 
+| limit 3815 
+| rename `id` AS `GknCxQFo` 
+| grok host.name "%{WORD:oGQQZHxQHj} %{WORD:qCopgNZPt} %{WORD:vHKOmmocPcTO}" 
+| stats  BkQXJRMeAM = min(GknCxQFo) 
+| keep `BkQXJRMeAM`
+;
+
+BkQXJRMeAM:long
+4325287503714500302
+;
+
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
@@ -369,6 +369,11 @@ public enum Cap {
          */
         GROK_DISSECT_MASKING,
 
+        /**
+         * Avid GROK and DISSECT attributes being removed when resolving fields.
+         * see <a href="https://github.com/elastic/elasticsearch/issues/127468"> ES|QL: Grok only supports KEYWORD or TEXT values, found expression [type] type [INTEGER] #127468 </a>
+         */
+        KEEP_REGEX_EXTRACT_ATTRIBUTES,
         /**
          * Support for quoting index sources in double quotes.
          */

diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java
@@ -596,7 +596,7 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
         var keepJoinRefsBuilder = AttributeSet.builder();
         Set<String> wildcardJoinIndices = new java.util.HashSet<>();
 
-        boolean[] canRemoveAliases = new boolean[] { true };
+        boolean[] canRemoveAliases = new boolean[] { true, true };
 
         parsed.forEachDown(p -> {// go over each plan top-down
             if (p instanceof RegexExtract re) { // for Grok and Dissect
@@ -663,13 +663,21 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
                 // remove the UnresolvedAttribute "x", since that is an Alias defined in "eval"
                 AttributeSet planRefs = p.references();
                 Set<String> fieldNames = planRefs.names();
-                p.forEachExpressionDown(Alias.class, alias -> {
-                    // do not remove the UnresolvedAttribute that has the same name as its alias, ie "rename id AS id"
-                    // or the UnresolvedAttributes that are used in Functions that have aliases "STATS id = MAX(id)"
-                    if (fieldNames.contains(alias.name())) {
-                        return;
+                canRemoveAliases[1] = true;
+                p.forEachDown(plan -> {
+                    if (canRemoveAliases[1] && couldOverrideAliases(plan)) {
+                        canRemoveAliases[1] = false;
+                    }
+                    if (canRemoveAliases[1]) {
+                        plan.forEachExpression(Alias.class, alias -> {
+                            // do not remove the UnresolvedAttribute that has the same name as its alias, ie "rename id AS id"
+                            // or the UnresolvedAttributes that are used in Functions that have aliases "STATS id = MAX(id)"
+                            if (fieldNames.contains(alias.name())) {
+                                return;
+                            }
+                            referencesBuilder.removeIf(attr -> matchByName(attr, alias.name(), keepCommandRefsBuilder.contains(attr)));
+                        });
                     }
-                    referencesBuilder.removeIf(attr -> matchByName(attr, alias.name(), keepCommandRefsBuilder.contains(attr)));
                 });
             }
         });

diff --git a/...esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java b/...esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java
@@ -1341,6 +1341,18 @@ public void testDissectOverwriteName() {
         assertThat(fieldNames, equalTo(Set.of("emp_no", "emp_no.*", "first_name", "first_name.*")));
     }
 
+    public void testAvoidGrokAttributesRemoval() {
+        Set<String> fieldNames = fieldNames("""
+            from message_types
+            | eval type = 1
+            | lookup join message_types_lookup on message
+            | drop  message
+            | grok type "%{WORD:b}"
+            | stats x = max(b)
+            | keep x""", Set.of());
+        assertThat(fieldNames, equalTo(Set.of("message", "x", "type", "x.*", "message.*", "type.*")));
+    }
+
     public void testEnrichOnDefaultField() {
         Set<String> fieldNames = fieldNames("""
             from employees