elastic · astefan · May 21, 2025 · Apr 30, 2025 · Apr 30, 2025 · Apr 30, 2025
diff --git a/docs/changelog/127563.yaml b/docs/changelog/127563.yaml
@@ -0,0 +1,6 @@
+pr: 127563
+summary: "ESQL: Avoid regex extract attributes removal"
+area: ES|QL
+type: bug
+issues:
+  - 127468
diff --git a/...ver/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java b/...ver/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java
@@ -53,7 +53,6 @@ public abstract class GenerativeRestTest extends ESRestTestCase {
         "optimized incorrectly due to missing references", // https://github.com/elastic/elasticsearch/issues/116781
         "No matches found for pattern", // https://github.com/elastic/elasticsearch/issues/126418
         "Unknown column", // https://github.com/elastic/elasticsearch/issues/127467
-        "only supports KEYWORD or TEXT values", // https://github.com/elastic/elasticsearch/issues/127468
         "The incoming YAML document exceeds the limit:" // still to investigate, but it seems to be specific to the test framework
     );
 

diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec
@@ -331,3 +331,19 @@ ROW a="b c d x"| DISSECT a "%{b} %{} %{d} %{}";
 a:keyword  | b:keyword | d:keyword
 b c d x    | b         | d
 ;
+
+avoidAttributesRemoval
+required_capability: keep_regex_extract_attributes
+required_capability: join_lookup_v12
+from message_types 
+| eval type = 1 
+| lookup join message_types_lookup on message 
+| drop message 
+| dissect type "%{b}" 
+| stats x = max(b) 
+| keep x
+;
+
+x:keyword
+Success
+;
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec
@@ -297,3 +297,34 @@ row text = "123 abc", int = 5 | sort int asc | grok text "%{NUMBER:text:int} %{W
 text:integer | int:integer | description:keyword
 123          | 5           | abc
 ;
+
+avoidAttributesRemoval
+required_capability: union_types
+required_capability: join_lookup_v12
+required_capability: keep_regex_extract_attributes
+from multivalue_points,h*,messa* 
+| eval  `card` = true, PbehoQUqKSF = "VLGjhcgNkQiEVyCLo", DsxMWtGL = true, qSxTIvUorMim = true, `location` = 8593178066470220111, type = -446161601, FSkGQkgmS = false 
+| eval  PbehoQUqKSF = 753987034, HLNMQfQj = true, `within` = true, `id` = "JDKKkYwhhh", lk = null, aecuvjTkgZza = 510616700, aDAMpuVtNX = null, qCopgNZPt = "AjhJUtZefqKdJYH", BxHHlFoA = "isBrmhKLc"
+| rename message as message 
+| lookup join message_types_lookup on message 
+| sort PbehoQUqKSF DESC, ip1 DESC NULLS LAST 
+| limit 5845 
+| drop `subset`, ip*, `card`, `within`, host.v*, description, `aecuvjTkgZza`, host.version, `ip0`, height_range, DsxMWtGL, host_group, `aDAMpuVtNX`, PbehoQUqKSF, `intersects`, `host.os`, aDAMpuVtNX, *ight_range, HLNMQfQj, `FSkGQkgmS`, BxHHlFoA, card 
+| grok type "%{WORD:GknCxQFo}" 
+| eval  `location` = null, ZjWUUvGusyyz = null, HeeKIpzgh = false, `id` = 4325287503714500302, host = false, `lk` = null, HvTQdOqFajpH = false, fKNlsYoT = true, `location` = -1158449473, `qCopgNZPt` = 1219986202615280617 
+| drop HeeKIpzg*, `ZjWUUvGusyyz`, `message`, `type`, `lk` 
+| grok GknCxQFo "%{WORD:location} %{WORD:HvTQdOqFajpH}" 
+| drop HvTQdOqFajpH, `location`, centroid 
+| mv_expand GknCxQFo 
+| limit 410 
+| limit 3815 
+| rename `id` AS `GknCxQFo` 
+| grok host.name "%{WORD:oGQQZHxQHj} %{WORD:qCopgNZPt} %{WORD:vHKOmmocPcTO}" 
+| stats  BkQXJRMeAM = min(GknCxQFo) 
+| keep `BkQXJRMeAM`
+;
+
+BkQXJRMeAM:long
+4325287503714500302
+;
+
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
@@ -369,6 +369,11 @@ public enum Cap {
          */
         GROK_DISSECT_MASKING,
 
+        /**
+         * Avid GROK and DISSECT attributes being removed when resolving fields.
+         * see <a href="https://github.com/elastic/elasticsearch/issues/127468"> ES|QL: Grok only supports KEYWORD or TEXT values, found expression [type] type [INTEGER] #127468 </a>
+         */
+        KEEP_REGEX_EXTRACT_ATTRIBUTES,
         /**
          * Support for quoting index sources in double quotes.
          */

diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java
@@ -594,6 +594,7 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
         // ie "from test | eval lang = languages + 1 | keep *l" should consider both "languages" and "*l" as valid fields to ask for
         var keepCommandRefsBuilder = AttributeSet.builder();
         var keepJoinRefsBuilder = AttributeSet.builder();
+        var regexExtractRefsBuilder = AttributeSet.builder();
         Set<String> wildcardJoinIndices = new java.util.HashSet<>();
 
         boolean[] canRemoveAliases = new boolean[] { true };
@@ -605,7 +606,7 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
                     referencesBuilder.removeIf(attr -> matchByName(attr, extracted.name(), false));
                 }
                 // but keep the inputs needed by Grok/Dissect
-                referencesBuilder.addAll(re.input().references());
+                regexExtractRefsBuilder.addAll(re.input().references());
             } else if (p instanceof Enrich enrich) {
                 AttributeSet enrichFieldRefs = Expressions.references(enrich.enrichFields());
                 AttributeSet.Builder enrichRefs = enrichFieldRefs.combine(enrich.matchField().references()).asBuilder();
@@ -676,6 +677,8 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
 
         // Add JOIN ON column references afterward to avoid Alias removal
         referencesBuilder.addAll(keepJoinRefsBuilder);
+        // Add the inputs needed by Grok/Dissect afterward to avoid Alias removal
+        referencesBuilder.addAll(regexExtractRefsBuilder);
         // If any JOIN commands need wildcard field-caps calls, persist the index names
         if (wildcardJoinIndices.isEmpty() == false) {
             result = result.withWildcardJoinIndices(wildcardJoinIndices);

diff --git a/...esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java b/...esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java
@@ -261,13 +261,16 @@ public void testDateFields() {
     }
 
     public void testEvalDissect() {
-        assertFieldNames("""
-            from employees
-            | eval full_name = concat(first_name, " ", last_name)
-            | dissect full_name "%{a} %{b}"
-            | sort emp_no asc
-            | keep full_name, a, b
-            | limit 3""", Set.of("first_name", "first_name.*", "last_name", "last_name.*", "emp_no", "emp_no.*"));
+        assertFieldNames(
+            """
+                from employees
+                | eval full_name = concat(first_name, " ", last_name)
+                | dissect full_name "%{a} %{b}"
+                | sort emp_no asc
+                | keep full_name, a, b
+                | limit 3""",
+            Set.of("emp_no", "first_name", "last_name", "full_name", "last_name.*", "first_name.*", "full_name.*", "emp_no.*")
+        );
     }
 
     public void testDissectExpression() {
@@ -685,13 +688,16 @@ public void testBucket() {
     }
 
     public void testEvalGrok() {
-        assertFieldNames("""
-            from employees
-            | eval full_name = concat(first_name, " ", last_name)
-            | grok full_name "%{WORD:a} %{WORD:b}"
-            | sort emp_no asc
-            | keep full_name, a, b
-            | limit 3""", Set.of("first_name", "first_name.*", "last_name", "last_name.*", "emp_no", "emp_no.*"));
+        assertFieldNames(
+            """
+                from employees
+                | eval full_name = concat(first_name, " ", last_name)
+                | grok full_name "%{WORD:a} %{WORD:b}"
+                | sort emp_no asc
+                | keep full_name, a, b
+                | limit 3""",
+            Set.of("emp_no", "first_name", "last_name", "full_name", "last_name.*", "first_name.*", "full_name.*", "emp_no.*")
+        );
     }
 
     public void testGrokExpression() {
@@ -710,7 +716,7 @@ public void testEvalGrokSort() {
             | grok full_name "%{WORD:a} %{WORD:b}"
             | sort a asc
             | keep full_name, a, b
-            | limit 3""", Set.of("first_name", "first_name.*", "last_name", "last_name.*"));
+            | limit 3""", Set.of("first_name", "last_name", "full_name", "last_name.*", "first_name.*", "full_name.*"));
     }
 
     public void testGrokStats() {
@@ -720,7 +726,7 @@ public void testGrokStats() {
             | grok x "%{WORD:a} %{WORD:b}"
             | stats n = max(emp_no) by a
             | keep a, n
-            | sort a asc""", Set.of("gender", "gender.*", "emp_no", "emp_no.*"));
+            | sort a asc""", Set.of("emp_no", "gender", "x", "x.*", "gender.*", "emp_no.*"));
     }
 
     public void testNullOnePattern() {
@@ -1341,6 +1347,18 @@ public void testDissectOverwriteName() {
         assertThat(fieldNames, equalTo(Set.of("emp_no", "emp_no.*", "first_name", "first_name.*")));
     }
 
+    public void testAvoidGrokAttributesRemoval() {
+        Set<String> fieldNames = fieldNames("""
+            from message_types
+            | eval type = 1
+            | lookup join message_types_lookup on message
+            | drop  message
+            | grok type "%{WORD:b}"
+            | stats x = max(b)
+            | keep x""", Set.of());
+        assertThat(fieldNames, equalTo(Set.of("message", "x", "type", "x.*", "message.*", "type.*")));
+    }
+
     public void testEnrichOnDefaultField() {
         Set<String> fieldNames = fieldNames("""
             from employees