From 117337e6d99df7e6e57341b1987297c7e83f7b88 Mon Sep 17 00:00:00 2001
From: Yang Wang <yang.wang@elastic.co>
Date: Thu, 17 Jul 2025 11:44:47 +1000
Subject: [PATCH 1/7] Update regions_by_endpoint for AWS sdk upgrade.

Also add test to ensure the file has at least one entry for each region
so that it is easy to spot missing regions in future upgrades.

Relates: #131050
---
 .../repositories/s3/regions_by_endpoint.txt      | 16 ++++++++++++++++
 .../s3/RegionFromEndpointGuesserTests.java       | 13 +++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/modules/repository-s3/src/main/resources/org/elasticsearch/repositories/s3/regions_by_endpoint.txt b/modules/repository-s3/src/main/resources/org/elasticsearch/repositories/s3/regions_by_endpoint.txt
index 3fae5c314c10b..5ca027a5f4a13 100644
--- a/modules/repository-s3/src/main/resources/org/elasticsearch/repositories/s3/regions_by_endpoint.txt
+++ b/modules/repository-s3/src/main/resources/org/elasticsearch/repositories/s3/regions_by_endpoint.txt
@@ -6,6 +6,10 @@ ap-east-1          s3-fips.ap-east-1.amazonaws.com
 ap-east-1          s3-fips.dualstack.ap-east-1.amazonaws.com
 ap-east-1          s3.ap-east-1.amazonaws.com
 ap-east-1          s3.dualstack.ap-east-1.amazonaws.com
+ap-east-2          s3-fips.ap-east-2.amazonaws.com
+ap-east-2          s3-fips.dualstack.ap-east-2.amazonaws.com
+ap-east-2          s3.ap-east-2.amazonaws.com
+ap-east-2          s3.dualstack.ap-east-2.amazonaws.com
 ap-northeast-1     s3-fips.ap-northeast-1.amazonaws.com
 ap-northeast-1     s3-fips.dualstack.ap-northeast-1.amazonaws.com
 ap-northeast-1     s3.ap-northeast-1.amazonaws.com
@@ -56,6 +60,14 @@ aws-iso-b-global   s3-fips.aws-iso-b-global.sc2s.sgov.gov
 aws-iso-b-global   s3-fips.dualstack.aws-iso-b-global.sc2s.sgov.gov
 aws-iso-b-global   s3.aws-iso-b-global.sc2s.sgov.gov
 aws-iso-b-global   s3.dualstack.aws-iso-b-global.sc2s.sgov.gov
+aws-iso-e-global   s3-fips.aws-iso-e-global.cloud.adc-e.uk
+aws-iso-e-global   s3-fips.dualstack.aws-iso-e-global.cloud.adc-e.uk
+aws-iso-e-global   s3.aws-iso-e-global.cloud.adc-e.uk
+aws-iso-e-global   s3.dualstack.aws-iso-e-global.cloud.adc-e.uk
+aws-iso-f-global   s3-fips.aws-iso-f-global.csp.hci.ic.gov
+aws-iso-f-global   s3-fips.dualstack.aws-iso-f-global.csp.hci.ic.gov
+aws-iso-f-global   s3.aws-iso-f-global.csp.hci.ic.gov
+aws-iso-f-global   s3.dualstack.aws-iso-f-global.csp.hci.ic.gov
 aws-iso-global     s3-fips.aws-iso-global.c2s.ic.gov
 aws-iso-global     s3-fips.dualstack.aws-iso-global.c2s.ic.gov
 aws-iso-global     s3.aws-iso-global.c2s.ic.gov
@@ -76,6 +88,10 @@ cn-north-1         s3.cn-north-1.amazonaws.com.cn
 cn-north-1         s3.dualstack.cn-north-1.amazonaws.com.cn
 cn-northwest-1     s3.cn-northwest-1.amazonaws.com.cn
 cn-northwest-1     s3.dualstack.cn-northwest-1.amazonaws.com.cn
+eusc-de-east-1     s3-fips.eusc-de-east-1.amazonaws.eu
+eusc-de-east-1     s3-fips.dualstack.eusc-de-east-1.amazonaws.eu
+eusc-de-east-1     s3.eusc-de-east-1.amazonaws.eu
+eusc-de-east-1     s3.dualstack.eusc-de-east-1.amazonaws.eu
 eu-central-1       s3-fips.dualstack.eu-central-1.amazonaws.com
 eu-central-1       s3-fips.eu-central-1.amazonaws.com
 eu-central-1       s3.dualstack.eu-central-1.amazonaws.com
diff --git a/modules/repository-s3/src/test/java/org/elasticsearch/repositories/s3/RegionFromEndpointGuesserTests.java b/modules/repository-s3/src/test/java/org/elasticsearch/repositories/s3/RegionFromEndpointGuesserTests.java
index 9fe0c40c83979..402181878b600 100644
--- a/modules/repository-s3/src/test/java/org/elasticsearch/repositories/s3/RegionFromEndpointGuesserTests.java
+++ b/modules/repository-s3/src/test/java/org/elasticsearch/repositories/s3/RegionFromEndpointGuesserTests.java
@@ -9,6 +9,11 @@
 
 package org.elasticsearch.repositories.s3;
 
+import software.amazon.awssdk.endpoints.Endpoint;
+import software.amazon.awssdk.regions.Region;
+import software.amazon.awssdk.services.s3.endpoints.S3EndpointParams;
+import software.amazon.awssdk.services.s3.endpoints.internal.DefaultS3EndpointProvider;
+
 import org.elasticsearch.core.Nullable;
 import org.elasticsearch.test.ESTestCase;
 
@@ -23,6 +28,14 @@ public void testRegionGuessing() {
         assertRegionGuess("random.endpoint.internal.net", null);
     }
 
+    public void testHasEntryForEachRegion() {
+        final var defaultS3EndpointProvider = new DefaultS3EndpointProvider();
+        for (var region : Region.regions()) {
+            final Endpoint endpoint = safeGet(defaultS3EndpointProvider.resolveEndpoint(S3EndpointParams.builder().region(region).build()));
+            assertNotNull(region.id(), RegionFromEndpointGuesser.guessRegion(endpoint.url().toString()));
+        }
+    }
+
     private static void assertRegionGuess(String endpoint, @Nullable String expectedRegion) {
         assertEquals(endpoint, expectedRegion, RegionFromEndpointGuesser.guessRegion(endpoint));
     }

From 1fad2c3e1bf08b8eabbc7db29e4b1650c1fe947f Mon Sep 17 00:00:00 2001
From: elasticsearchmachine
 <58790826+elasticsearchmachine@users.noreply.github.com>
Date: Thu, 17 Jul 2025 09:43:49 +0200
Subject: [PATCH 2/7] Mute org.elasticsearch.packaging.test.DockerTests
 test072RunEsAsDifferentUserAndGroup #131412

---
 muted-tests.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/muted-tests.yml b/muted-tests.yml
index 27ffc88873c70..6f920d95d4189 100644
--- a/muted-tests.yml
+++ b/muted-tests.yml
@@ -466,6 +466,9 @@ tests:
 - class: org.elasticsearch.xpack.downsample.DataStreamLifecycleDownsampleDisruptionIT
   method: testDataStreamLifecycleDownsampleRollingRestart
   issue: https://github.com/elastic/elasticsearch/issues/131394
+- class: org.elasticsearch.packaging.test.DockerTests
+  method: test072RunEsAsDifferentUserAndGroup
+  issue: https://github.com/elastic/elasticsearch/issues/131412
 
 # Examples:
 #

From aadf40c156387793fd6bf3b0e8bd234d5d836d22 Mon Sep 17 00:00:00 2001
From: kanoshiou <brycegjh@outlook.com>
Date: Thu, 17 Jul 2025 16:23:04 +0800
Subject: [PATCH 3/7] ESQL: Fix inconsistent column order in MV_EXPAND
 (#129745)

The new attribute generated by MV_EXPAND should remain in the original position. The projection added by ProjectAwayColumns does not respect the original order of attributes.

Make ProjectAwayColumns respect the order of attributes to fix this.
---
 docs/changelog/129745.yaml                    |   6 +
 .../esql/core/expression/AttributeSet.java    |   4 +
 .../rest/generative/GenerativeRestTest.java   |   1 -
 .../src/main/resources/mv_expand.csv-spec     |  62 +++
 .../xpack/esql/action/EsqlCapabilities.java   |   6 +
 .../rules/physical/ProjectAwayColumns.java    |  14 +-
 .../optimizer/PhysicalPlanOptimizerTests.java | 500 +++++++++---------
 7 files changed, 350 insertions(+), 243 deletions(-)
 create mode 100644 docs/changelog/129745.yaml

diff --git a/docs/changelog/129745.yaml b/docs/changelog/129745.yaml
new file mode 100644
index 0000000000000..35cfd0671bd64
--- /dev/null
+++ b/docs/changelog/129745.yaml
@@ -0,0 +1,6 @@
+pr: 129745
+summary: "ESQL: Fix `mv_expand` inconsistent column order"
+area: ES|QL
+type: bug
+issues:
+  - 129000
diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/AttributeSet.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/AttributeSet.java
index d281db4e6bf63..fefaf3098319e 100644
--- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/AttributeSet.java
+++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/AttributeSet.java
@@ -261,5 +261,9 @@ public boolean isEmpty() {
         public AttributeSet build() {
             return new AttributeSet(mapBuilder.build());
         }
+
+        public void clear() {
+            mapBuilder.keySet().clear();
+        }
     }
 }
diff --git a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java
index 2fefcccb4d14f..41b8658302bd7 100644
--- a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java
+++ b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java
@@ -53,7 +53,6 @@ public abstract class GenerativeRestTest extends ESRestTestCase {
         "Data too large", // Circuit breaker exceptions eg. https://github.com/elastic/elasticsearch/issues/130072
 
         // Awaiting fixes for correctness
-        "Expecting the following columns \\[.*\\], got", // https://github.com/elastic/elasticsearch/issues/129000
         "Expecting at most \\[.*\\] columns, got \\[.*\\]" // https://github.com/elastic/elasticsearch/issues/129561
     );
 
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mv_expand.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mv_expand.csv-spec
index 20ce3ecc5a396..c7dbe01ef6f09 100644
--- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mv_expand.csv-spec
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mv_expand.csv-spec
@@ -419,3 +419,65 @@ emp_no:integer | job_positions:keyword
 10001          | Accountant
 10001          | Senior Python Developer          
 ;
+
+testMvExpandInconsistentColumnOrder1
+required_capability: fix_mv_expand_inconsistent_column_order
+from message_types
+| eval foo_1 = 1, foo_2 = 2 
+| sort message
+| mv_expand foo_1
+;
+
+message:keyword         | type:keyword | foo_1:integer | foo_2:integer
+Connected to 10.1.0.1   | Success      | 1             | 2
+Connected to 10.1.0.2   | Success      | 1             | 2
+Connected to 10.1.0.3   | Success      | 1             | 2
+Connection error        | Error        | 1             | 2
+Development environment | Development  | 1             | 2
+Disconnected            | Disconnected | 1             | 2
+Production environment  | Production   | 1             | 2
+;
+
+testMvExpandInconsistentColumnOrder2
+required_capability: fix_mv_expand_inconsistent_column_order
+from message_types
+| eval foo_1 = [1, 3], foo_2 = 2
+| sort message
+| mv_expand foo_1
+;
+
+message:keyword         | type:keyword | foo_1:integer | foo_2:integer
+Connected to 10.1.0.1   | Success      | 1             | 2
+Connected to 10.1.0.1   | Success      | 3             | 2
+Connected to 10.1.0.2   | Success      | 1             | 2
+Connected to 10.1.0.2   | Success      | 3             | 2
+Connected to 10.1.0.3   | Success      | 1             | 2
+Connected to 10.1.0.3   | Success      | 3             | 2
+Connection error        | Error        | 1             | 2
+Connection error        | Error        | 3             | 2
+Development environment | Development  | 1             | 2
+Development environment | Development  | 3             | 2
+Disconnected            | Disconnected | 1             | 2
+Disconnected            | Disconnected | 3             | 2
+Production environment  | Production   | 1             | 2
+Production environment  | Production   | 3             | 2
+;
+
+testMvExpandInconsistentColumnOrder3
+required_capability: fix_mv_expand_inconsistent_column_order
+from message_types
+| sort type
+| eval language_code = 1, `language_name` = false, message = true, foo_3 = 1, foo_2 = null
+| eval foo_3 = "1", `foo_3` = -1, foo_1 = 1, `language_code` = null, `foo_2` = "1"
+| mv_expand foo_1
+| limit 5
+;
+
+type:keyword | language_name:boolean | message:boolean | foo_3:integer | foo_1:integer | language_code:null | foo_2:keyword
+Development  | false                 | true            | -1            | 1             | null               | 1
+Disconnected | false                 | true            | -1            | 1             | null               | 1
+Error        | false                 | true            | -1            | 1             | null               | 1
+Production   | false                 | true            | -1            | 1             | null               | 1
+Success      | false                 | true            | -1            | 1             | null               | 1
+;
+
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
index ac75811602bd6..f4f6bd8a12d79 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
@@ -1235,6 +1235,12 @@ public enum Cap {
          */
         NO_PLAIN_STRINGS_IN_LITERALS,
 
+        /**
+         * Support for the mv_expand target attribute should be retained in its original position.
+         * see <a href="https://github.com/elastic/elasticsearch/issues/129000"> ES|QL: inconsistent column order #129000 </a>
+         */
+        FIX_MV_EXPAND_INCONSISTENT_COLUMN_ORDER,
+
         /**
          * (Re)Added EXPLAIN command
          */
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/ProjectAwayColumns.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/ProjectAwayColumns.java
index 26cfbf40eb7ff..887fb039a14cb 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/ProjectAwayColumns.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/ProjectAwayColumns.java
@@ -76,7 +76,19 @@ public PhysicalPlan apply(PhysicalPlan plan) {
 
                     // no need for projection when dealing with aggs
                     if (logicalFragment instanceof Aggregate == false) {
-                        List<Attribute> output = new ArrayList<>(requiredAttrBuilder.build());
+                        // we should respect the order of the attributes
+                        List<Attribute> output = new ArrayList<>();
+                        for (Attribute attribute : logicalFragment.output()) {
+                            if (requiredAttrBuilder.contains(attribute)) {
+                                output.add(attribute);
+                                requiredAttrBuilder.remove(attribute);
+                            }
+                        }
+                        // requiredAttrBuilder should be empty unless the plan is inconsistent due to a bug.
+                        // This can happen in case of remote ENRICH, see https://github.com/elastic/elasticsearch/issues/118531
+                        // TODO: stop adding the remaining required attributes once remote ENRICH is fixed.
+                        output.addAll(requiredAttrBuilder.build());
+
                         // if all the fields are filtered out, it's only the count that matters
                         // however until a proper fix (see https://github.com/elastic/elasticsearch/issues/98703)
                         // add a synthetic field (so it doesn't clash with the user defined one) to return a constant
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java
index 79d58341783aa..a609a1e494e54 100644
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java
@@ -128,6 +128,7 @@
 import org.elasticsearch.xpack.esql.plan.physical.LimitExec;
 import org.elasticsearch.xpack.esql.plan.physical.LocalSourceExec;
 import org.elasticsearch.xpack.esql.plan.physical.LookupJoinExec;
+import org.elasticsearch.xpack.esql.plan.physical.MvExpandExec;
 import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan;
 import org.elasticsearch.xpack.esql.plan.physical.ProjectExec;
 import org.elasticsearch.xpack.esql.plan.physical.TopNExec;
@@ -193,6 +194,7 @@
 import static org.hamcrest.Matchers.closeTo;
 import static org.hamcrest.Matchers.contains;
 import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.hamcrest.Matchers.containsInRelativeOrder;
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.empty;
 import static org.hamcrest.Matchers.equalTo;
@@ -625,16 +627,16 @@ public void testTripleExtractorPerField() {
     }
 
     /**
-     * Expected
-     * LimitExec[10000[INTEGER]]
-     * \_AggregateExec[[],[AVG(salary{f}#14) AS x],FINAL]
-     *   \_AggregateExec[[],[AVG(salary{f}#14) AS x],PARTIAL]
-     *     \_FilterExec[ROUND(emp_no{f}#9) > 10[INTEGER]]
-     *       \_TopNExec[[Order[last_name{f}#13,ASC,LAST]],10[INTEGER]]
-     *         \_ExchangeExec[]
-     *           \_ProjectExec[[salary{f}#14, first_name{f}#10, emp_no{f}#9, last_name{f}#13]]     -- project away _doc
-     *             \_FieldExtractExec[salary{f}#14, first_name{f}#10, emp_no{f}#9, last_n..]       -- local field extraction
-     *               \_EsQueryExec[test], query[][_doc{f}#16], limit[10], sort[[last_name]]
+     *LimitExec[10000[INTEGER],8]
+     * \_AggregateExec[[],[SUM(salary{f}#13460,true[BOOLEAN]) AS x#13454],FINAL,[$$x$sum{r}#13466, $$x$seen{r}#13467],8]
+     *   \_AggregateExec[[],[SUM(salary{f}#13460,true[BOOLEAN]) AS x#13454],INITIAL,[$$x$sum{r}#13466, $$x$seen{r}#13467],8]
+     *     \_FilterExec[ROUND(emp_no{f}#13455) > 10[INTEGER]]
+     *       \_TopNExec[[Order[last_name{f}#13459,ASC,LAST]],10[INTEGER],58]
+     *         \_ExchangeExec[[emp_no{f}#13455, last_name{f}#13459, salary{f}#13460],false]
+     *           \_ProjectExec[[emp_no{f}#13455, last_name{f}#13459, salary{f}#13460]]              -- project away _doc
+     *             \_FieldExtractExec[emp_no{f}#13455, last_name{f}#13459, salary{f}#1346..] &lt;[],[]&gt; -- local field extraction
+     *               \_EsQueryExec[test], indexMode[standard], query[][_doc{f}#13482], limit[10],
+     *               sort[[FieldSort[field=last_name{f}#13459, direction=ASC, nulls=LAST]]] estimatedRowSize[74]
      */
     public void testExtractorForField() {
         var plan = physicalPlan("""
@@ -658,7 +660,7 @@ public void testExtractorForField() {
         var exchange = asRemoteExchange(topN.child());
         var project = as(exchange.child(), ProjectExec.class);
         var extract = as(project.child(), FieldExtractExec.class);
-        assertThat(names(extract.attributesToExtract()), contains("salary", "emp_no", "last_name"));
+        assertThat(names(extract.attributesToExtract()), contains("emp_no", "last_name", "salary"));
         var source = source(extract.child());
         assertThat(source.limit(), is(topN.limit()));
         assertThat(source.sorts(), is(fieldSorts(topN.order())));
@@ -2219,7 +2221,7 @@ public void testNoPushDownChangeCase() {
      *      ages{f}#6, last_name{f}#7, long_noidx{f}#13, salary{f}#8],false]
      *   \_ProjectExec[[_meta_field{f}#9, emp_no{f}#3, first_name{f}#4, gender{f}#5, hire_date{f}#10, job{f}#11, job.raw{f}#12, langu
      *          ages{f}#6, last_name{f}#7, long_noidx{f}#13, salary{f}#8]]
-     *     \_FieldExtractExec[_meta_field{f}#9, emp_no{f}#3, first_name{f}#4, gen..]<[],[]>
+     *     \_FieldExtractExec[_meta_field{f}#9, emp_no{f}#3, first_name{f}#4, gen..]&lt;[],[]&gt;
      *       \_EsQueryExec[test], indexMode[standard], query[{"esql_single_value":{"field":"first_name","next":{"regexp":{"first_name":
      *       {"value":"foo*","flags_value":65791,"case_insensitive":true,"max_determinized_states":10000,"boost":0.0}}},
      *       "source":"TO_LOWER(first_name) RLIKE \"foo*\"@2:9"}}][_doc{f}#25], limit[1000], sort[] estimatedRowSize[332]
@@ -2340,10 +2342,10 @@ public void testPushDownUpperCaseChangeLike() {
      * uages{f}#7, last_name{f}#8, long_noidx{f}#14, salary{f}#9],false]
      *   \_ProjectExec[[_meta_field{f}#10, emp_no{f}#4, first_name{f}#5, gender{f}#6, hire_date{f}#11, job{f}#12, job.raw{f}#13, lang
      * uages{f}#7, last_name{f}#8, long_noidx{f}#14, salary{f}#9]]
-     *     \_FieldExtractExec[_meta_field{f}#10, gender{f}#6, hire_date{f}#11, jo..]<[],[]>
+     *     \_FieldExtractExec[_meta_field{f}#10, gender{f}#6, hire_date{f}#11, jo..]&lt;[],[]&gt;
      *       \_LimitExec[1000[INTEGER]]
      *         \_FilterExec[LIKE(first_name{f}#5, "FOO*", true) OR IN(1[INTEGER],2[INTEGER],3[INTEGER],emp_no{f}#4 + 1[INTEGER])]
-     *           \_FieldExtractExec[first_name{f}#5, emp_no{f}#4]<[],[]>
+     *           \_FieldExtractExec[first_name{f}#5, emp_no{f}#4]&lt;[],[]&gt;
      *             \_EsQueryExec[test], indexMode[standard], query[][_doc{f}#26], limit[], sort[] estimatedRowSize[332]
      */
     public void testChangeCaseAsInsensitiveWildcardLikeNotPushedDown() {
@@ -2458,22 +2460,17 @@ public void testPushDownEvalFilter() {
 
     /**
      * <code>
-     * ProjectExec[[last_name{f}#21 AS name, first_name{f}#18 AS last_name, last_name{f}#21 AS first_name]]
-     * \_TopNExec[[Order[last_name{f}#21,ASC,LAST]],10[INTEGER],0]
-     *   \_ExchangeExec[[last_name{f}#21, first_name{f}#18],false]
-     *     \_ProjectExec[[last_name{f}#21, first_name{f}#18]]
-     *       \_FieldExtractExec[last_name{f}#21, first_name{f}#18][]
-     *         \_EsQueryExec[test], indexMode[standard], query[{
-     *           "bool":{"must":[
-     *             {"esql_single_value":{
-     *               "field":"last_name",
-     *               "next":{"range":{"last_name":{"gt":"B","boost":1.0}}},
-     *               "source":"first_name &gt; \"B\"@3:9"
-     *             }},
-     *             {"exists":{"field":"first_name","boost":1.0}}
-     *           ],"boost":1.0}}][_doc{f}#40], limit[10], sort[[
-     *             FieldSort[field=last_name{f}#21, direction=ASC, nulls=LAST]
-     *           ]] estimatedRowSize[116]
+     * ProjectExec[[last_name{f}#13858 AS name#13841, first_name{f}#13855 AS last_name#13844, last_name{f}#13858 AS first_name#13
+     * 847]]
+     * \_TopNExec[[Order[last_name{f}#13858,ASC,LAST]],10[INTEGER],100]
+     *   \_ExchangeExec[[first_name{f}#13855, last_name{f}#13858],false]
+     *     \_ProjectExec[[first_name{f}#13855, last_name{f}#13858]]
+     *       \_FieldExtractExec[first_name{f}#13855, last_name{f}#13858]&lt;[],[]&gt;
+     *         \_EsQueryExec[test], indexMode[standard], query[
+     *         {"bool":{"must":[{"esql_single_value":{"field":"last_name","next":
+     *         {"range":{"last_name":{"gt":"B","boost":0.0}}},"source":"first_name > \"B\"@3:9"}},
+     *         {"exists":{"field":"first_name","boost":0.0}}],"boost":1.0}}
+     *         ][_doc{f}#13879], limit[10], sort[[FieldSort[field=last_name{f}#13858, direction=ASC, nulls=LAST]]] estimatedRowSize[116]
      * </code>
      */
     public void testPushDownEvalSwapFilter() {
@@ -2494,7 +2491,7 @@ public void testPushDownEvalSwapFilter() {
         var extract = as(project.child(), FieldExtractExec.class);
         assertThat(
             extract.attributesToExtract().stream().map(Attribute::name).collect(Collectors.toList()),
-            contains("last_name", "first_name")
+            contains("first_name", "last_name")
         );
 
         // Now verify the correct Lucene push-down of both the filter and the sort
@@ -2607,7 +2604,7 @@ public void testDissect() {
      * uages{f}#7, last_name{f}#8, long_noidx{f}#14, salary{f}#9, _index{m}#2],false]
      *   \_ProjectExec[[_meta_field{f}#10, emp_no{f}#4, first_name{f}#5, gender{f}#6, hire_date{f}#11, job{f}#12, job.raw{f}#13, lang
      * uages{f}#7, last_name{f}#8, long_noidx{f}#14, salary{f}#9, _index{m}#2]]
-     *     \_FieldExtractExec[_meta_field{f}#10, emp_no{f}#4, first_name{f}#5, ge..]<[],[]>
+     *     \_FieldExtractExec[_meta_field{f}#10, emp_no{f}#4, first_name{f}#5, ge..]&lt;[],[]&gt;
      *       \_EsQueryExec[test], indexMode[standard], query[{"wildcard":{"_index":{"wildcard":"test*","boost":0.0}}}][_doc{f}#27],
      *          limit[1000], sort[] estimatedRowSize[382]
      *
@@ -3176,6 +3173,56 @@ public void testProjectAwayAllColumnsWhenOnlyTheCountMattersInStats() {
         assertThat(Expressions.names(esQuery.attrs()), contains("_doc"));
     }
 
+    /**
+     * LimitExec[1000[INTEGER],336]
+     * \_MvExpandExec[foo_1{r}#4236,foo_1{r}#4253]
+     *   \_TopNExec[[Order[emp_no{f}#4242,ASC,LAST]],1000[INTEGER],336]
+     *     \_ExchangeExec[[_meta_field{f}#4248, emp_no{f}#4242, first_name{f}#4243, gender{f}#4244, hire_date{f}#4249, job{f}#4250, job.
+     * raw{f}#4251, languages{f}#4245, last_name{f}#4246, long_noidx{f}#4252, salary{f}#4247, foo_1{r}#4236, foo_2{r}#4238],
+     * false]
+     *       \_ProjectExec[[_meta_field{f}#4248, emp_no{f}#4242, first_name{f}#4243, gender{f}#4244, hire_date{f}#4249, job{f}#4250, job.
+     * raw{f}#4251, languages{f}#4245, last_name{f}#4246, long_noidx{f}#4252, salary{f}#4247, foo_1{r}#4236, foo_2{r}#4238]]
+     *         \_FieldExtractExec[_meta_field{f}#4248, emp_no{f}#4242, first_name{f}#..]&lt;[],[]&gt;
+     *           \_EvalExec[[1[INTEGER] AS foo_1#4236, 1[INTEGER] AS foo_2#4238]]
+     *             \_EsQueryExec[test], indexMode[standard], query[][_doc{f}#4268], limit[1000], sort[[FieldSort[field=emp_no{f}#4242,
+     *             direction=ASC, nulls=LAST]]] estimatedRowSize[352]
+     */
+    public void testProjectAwayMvExpandColumnOrder() {
+        var plan = optimizedPlan(physicalPlan("""
+            from test
+            | eval foo_1 = 1, foo_2 = 1
+            | sort emp_no
+            | mv_expand foo_1
+            """));
+        var limit = as(plan, LimitExec.class);
+        var mvExpand = as(limit.child(), MvExpandExec.class);
+        var topN = as(mvExpand.child(), TopNExec.class);
+        var exchange = as(topN.child(), ExchangeExec.class);
+        var project = as(exchange.child(), ProjectExec.class);
+
+        assertThat(
+            Expressions.names(project.projections()),
+            containsInRelativeOrder(
+                "_meta_field",
+                "emp_no",
+                "first_name",
+                "gender",
+                "hire_date",
+                "job",
+                "job.raw",
+                "languages",
+                "last_name",
+                "long_noidx",
+                "salary",
+                "foo_1",
+                "foo_2"
+            )
+        );
+        var fieldExtract = as(project.child(), FieldExtractExec.class);
+        var eval = as(fieldExtract.child(), EvalExec.class);
+        EsQueryExec esQuery = as(eval.child(), EsQueryExec.class);
+    }
+
     /**
      * ProjectExec[[a{r}#5]]
      * \_EvalExec[[__a_SUM@81823521{r}#15 / __a_COUNT@31645621{r}#16 AS a]]
@@ -5674,16 +5721,15 @@ public void testPushTopNWithFilterToSource() {
     }
 
     /**
-     * ProjectExec[[abbrev{f}#12321, name{f}#12322, location{f}#12325, country{f}#12326, city{f}#12327]]
-     * \_TopNExec[[Order[abbrev{f}#12321,ASC,LAST]],5[INTEGER],0]
-     *   \_ExchangeExec[[abbrev{f}#12321, name{f}#12322, location{f}#12325, country{f}#12326, city{f}#12327],false]
-     *     \_ProjectExec[[abbrev{f}#12321, name{f}#12322, location{f}#12325, country{f}#12326, city{f}#12327]]
-     *       \_FieldExtractExec[abbrev{f}#12321, name{f}#12322, location{f}#12325, ..][]
+     * ProjectExec[[abbrev{f}#4474, name{f}#4475, location{f}#4478, country{f}#4479, city{f}#4480]]
+     * \_TopNExec[[Order[abbrev{f}#4474,ASC,LAST]],5[INTEGER],221]
+     *   \_ExchangeExec[[abbrev{f}#4474, city{f}#4480, country{f}#4479, location{f}#4478, name{f}#4475],false]
+     *     \_ProjectExec[[abbrev{f}#4474, city{f}#4480, country{f}#4479, location{f}#4478, name{f}#4475]]
+     *       \_FieldExtractExec[abbrev{f}#4474, city{f}#4480, country{f}#4479, loca..]&lt;[],[]&gt;
      *         \_EsQueryExec[airports],
-     *           indexMode[standard],
-     *           query[][_doc{f}#12337],
-     *           limit[5],
-     *           sort[[FieldSort[field=abbrev{f}#12321, direction=ASC, nulls=LAST]]] estimatedRowSize[237]
+     *         indexMode[standard],
+     *         query[][_doc{f}#4490],
+     *         limit[5], sort[[FieldSort[field=abbrev{f}#4474, direction=ASC, nulls=LAST]]] estimatedRowSize[237]
      */
     public void testPushTopNKeywordToSource() {
         var optimized = optimizedPlan(physicalPlan("""
@@ -5698,9 +5744,9 @@ public void testPushTopNKeywordToSource() {
         var exchange = asRemoteExchange(topN.child());
 
         project = as(exchange.child(), ProjectExec.class);
-        assertThat(names(project.projections()), contains("abbrev", "name", "location", "country", "city"));
+        assertThat(names(project.projections()), contains("abbrev", "city", "country", "location", "name"));
         var extract = as(project.child(), FieldExtractExec.class);
-        assertThat(names(extract.attributesToExtract()), contains("abbrev", "name", "location", "country", "city"));
+        assertThat(names(extract.attributesToExtract()), contains("abbrev", "city", "country", "location", "name"));
         var source = source(extract.child());
         assertThat(source.limit(), is(topN.limit()));
         assertThat(source.sorts(), is(fieldSorts(topN.order())));
@@ -5716,13 +5762,13 @@ public void testPushTopNKeywordToSource() {
 
     /**
      * <code>
-     * ProjectExec[[abbrev{f}#12, name{f}#13, location{f}#16, country{f}#17, city{f}#18, abbrev{f}#12 AS code]]
-     * \_TopNExec[[Order[abbrev{f}#12,ASC,LAST]],5[INTEGER],0]
-     *   \_ExchangeExec[[abbrev{f}#12, name{f}#13, location{f}#16, country{f}#17, city{f}#18],false]
-     *     \_ProjectExec[[abbrev{f}#12, name{f}#13, location{f}#16, country{f}#17, city{f}#18]]
-     *       \_FieldExtractExec[abbrev{f}#12, name{f}#13, location{f}#16, country{f..][]
-     *         \_EsQueryExec[airports], indexMode[standard], query[][_doc{f}#29], limit[5],
-     *             sort[[FieldSort[field=abbrev{f}#12, direction=ASC, nulls=LAST]]] estimatedRowSize[237]
+     * ProjectExec[[abbrev{f}#7828, name{f}#7829, location{f}#7832, country{f}#7833, city{f}#7834, abbrev{f}#7828 AS code#7820]]
+     * \_TopNExec[[Order[abbrev{f}#7828,ASC,LAST]],5[INTEGER],221]
+     *   \_ExchangeExec[[abbrev{f}#7828, city{f}#7834, country{f}#7833, location{f}#7832, name{f}#7829],false]
+     *     \_ProjectExec[[abbrev{f}#7828, city{f}#7834, country{f}#7833, location{f}#7832, name{f}#7829]]
+     *       \_FieldExtractExec[abbrev{f}#7828, city{f}#7834, country{f}#7833, loca..]&lt;[],[]&gt;
+     *         \_EsQueryExec[airports], indexMode[standard], query[][_doc{f}#7845], limit[5],
+     *         sort[[FieldSort[field=abbrev{f}#7828, direction=ASC, nulls=LAST]]] estimatedRowSize[237]
      * </code>
      */
     public void testPushTopNAliasedKeywordToSource() {
@@ -5740,9 +5786,9 @@ public void testPushTopNAliasedKeywordToSource() {
         var exchange = asRemoteExchange(topN.child());
 
         project = as(exchange.child(), ProjectExec.class);
-        assertThat(names(project.projections()), contains("abbrev", "name", "location", "country", "city"));
+        assertThat(names(project.projections()), contains("abbrev", "city", "country", "location", "name"));
         var extract = as(project.child(), FieldExtractExec.class);
-        assertThat(names(extract.attributesToExtract()), contains("abbrev", "name", "location", "country", "city"));
+        assertThat(names(extract.attributesToExtract()), contains("abbrev", "city", "country", "location", "name"));
         var source = source(extract.child());
         assertThat(source.limit(), is(topN.limit()));
         assertThat(source.sorts(), is(fieldSorts(topN.order())));
@@ -5757,19 +5803,19 @@ public void testPushTopNAliasedKeywordToSource() {
     }
 
     /**
-     * ProjectExec[[abbrev{f}#11, name{f}#12, location{f}#15, country{f}#16, city{f}#17]]
-     * \_TopNExec[[Order[distance{r}#4,ASC,LAST]],5[INTEGER],0]
-     *   \_ExchangeExec[[abbrev{f}#11, name{f}#12, location{f}#15, country{f}#16, city{f}#17, distance{r}#4],false]
-     *     \_ProjectExec[[abbrev{f}#11, name{f}#12, location{f}#15, country{f}#16, city{f}#17, distance{r}#4]]
-     *       \_FieldExtractExec[abbrev{f}#11, name{f}#12, country{f}#16, city{f}#17][]
-     *         \_EvalExec[[STDISTANCE(location{f}#15,[1 1 0 0 0 e1 7a 14 ae 47 21 29 40 a0 1a 2f dd 24 d6 4b 40][GEO_POINT])
-     *             AS distance]]
-     *           \_FieldExtractExec[location{f}#15][]
+     * ProjectExec[[abbrev{f}#7283, name{f}#7284, location{f}#7287, country{f}#7288, city{f}#7289]]
+     * \_TopNExec[[Order[distance{r}#7276,ASC,LAST]],5[INTEGER],229]
+     *   \_ExchangeExec[[abbrev{f}#7283, city{f}#7289, country{f}#7288, location{f}#7287, name{f}#7284, distance{r}#7276],false]
+     *     \_ProjectExec[[abbrev{f}#7283, city{f}#7289, country{f}#7288, location{f}#7287, name{f}#7284, distance{r}#7276]]
+     *       \_FieldExtractExec[abbrev{f}#7283, city{f}#7289, country{f}#7288, name..]&lt;[],[]&gt;
+     *         \_EvalExec[[STDISTANCE(location{f}#7287,[1 1 0 0 0 e1 7a 14 ae 47 21 29 40 a0 1a 2f dd 24 d6 4b 40][GEO_POINT]) AS distan
+     * ce#7276]]
+     *           \_FieldExtractExec[location{f}#7287]&lt;[],[]&gt;
      *             \_EsQueryExec[airports],
-     *               indexMode[standard],
-     *               query[][_doc{f}#28],
-     *               limit[5],
-     *               sort[[GeoDistanceSort[field=location{f}#15, direction=ASC, lat=55.673, lon=12.565]]] estimatedRowSize[245]
+     *             indexMode[standard],
+     *             query[][_doc{f}#7300],
+     *             limit[5],
+     *             sort[[GeoDistanceSort[field=location{f}#7287, direction=ASC, lat=55.673, lon=12.565]]] estimatedRowSize[245]
      */
     public void testPushTopNDistanceToSource() {
         var optimized = optimizedPlan(physicalPlan("""
@@ -5785,9 +5831,9 @@ public void testPushTopNDistanceToSource() {
         var exchange = asRemoteExchange(topN.child());
 
         project = as(exchange.child(), ProjectExec.class);
-        assertThat(names(project.projections()), contains("abbrev", "name", "location", "country", "city", "distance"));
+        assertThat(names(project.projections()), contains("abbrev", "city", "country", "location", "name", "distance"));
         var extract = as(project.child(), FieldExtractExec.class);
-        assertThat(names(extract.attributesToExtract()), contains("abbrev", "name", "country", "city"));
+        assertThat(names(extract.attributesToExtract()), contains("abbrev", "city", "country", "name"));
         var evalExec = as(extract.child(), EvalExec.class);
         var alias = as(evalExec.fields().get(0), Alias.class);
         assertThat(alias.name(), is("distance"));
@@ -5814,20 +5860,19 @@ public void testPushTopNDistanceToSource() {
     }
 
     /**
-     * ProjectExec[[abbrev{f}#8, name{f}#9, location{f}#12, country{f}#13, city{f}#14]]
-     * \_TopNExec[[Order[$$order_by$0$0{r}#16,ASC,LAST]],5[INTEGER],0]
-     *   \_ExchangeExec[[abbrev{f}#8, name{f}#9, location{f}#12, country{f}#13, city{f}#14, $$order_by$0$0{r}#16],false]
-     *     \_ProjectExec[[abbrev{f}#8, name{f}#9, location{f}#12, country{f}#13, city{f}#14, $$order_by$0$0{r}#16]]
-     *       \_FieldExtractExec[abbrev{f}#8, name{f}#9, country{f}#13, city{f}#14][]
-     *         \_EvalExec[[
-     *             STDISTANCE(location{f}#12,[1 1 0 0 0 e1 7a 14 ae 47 21 29 40 a0 1a 2f dd 24 d6 4b 40][GEO_POINT]) AS $$order_by$0$0
-     *           ]]
-     *           \_FieldExtractExec[location{f}#12][]
+     *ProjectExec[[abbrev{f}#5258, name{f}#5259, location{f}#5262, country{f}#5263, city{f}#5264]]
+     * \_TopNExec[[Order[$$order_by$0$0{r}#5266,ASC,LAST]],5[INTEGER],229]
+     *   \_ExchangeExec[[abbrev{f}#5258, city{f}#5264, country{f}#5263, location{f}#5262, name{f}#5259, $$order_by$0$0{r}#5266],false]
+     *     \_ProjectExec[[abbrev{f}#5258, city{f}#5264, country{f}#5263, location{f}#5262, name{f}#5259, $$order_by$0$0{r}#5266]]
+     *       \_FieldExtractExec[abbrev{f}#5258, city{f}#5264, country{f}#5263, name..]&lt;[],[]&gt;
+     *         \_EvalExec[[STDISTANCE(location{f}#5262,[1 1 0 0 0 e1 7a 14 ae 47 21 29 40 a0 1a 2f dd 24 d6 4b 40][GEO_POINT]) AS $$orde
+     * r_by$0$0#5266]]
+     *           \_FieldExtractExec[location{f}#5262]&lt;[],[]&gt;
      *             \_EsQueryExec[airports],
-     *               indexMode[standard],
-     *               query[][_doc{f}#26],
-     *               limit[5],
-     *               sort[[GeoDistanceSort[field=location{f}#12, direction=ASC, lat=55.673, lon=12.565]]] estimatedRowSize[245]
+     *             indexMode[standard],
+     *             query[][_doc{f}#5276],
+     *             limit[5],
+     *             sort[[GeoDistanceSort[field=location{f}#5262, direction=ASC, lat=55.673, lon=12.565]]] estimatedRowSize[245]
      */
     public void testPushTopNInlineDistanceToSource() {
         var optimized = optimizedPlan(physicalPlan("""
@@ -5847,15 +5892,15 @@ public void testPushTopNInlineDistanceToSource() {
             names(project.projections()),
             contains(
                 equalTo("abbrev"),
-                equalTo("name"),
-                equalTo("location"),
-                equalTo("country"),
                 equalTo("city"),
+                equalTo("country"),
+                equalTo("location"),
+                equalTo("name"),
                 startsWith("$$order_by$0$")
             )
         );
         var extract = as(project.child(), FieldExtractExec.class);
-        assertThat(names(extract.attributesToExtract()), contains("abbrev", "name", "country", "city"));
+        assertThat(names(extract.attributesToExtract()), contains("abbrev", "city", "country", "name"));
         var evalExec = as(extract.child(), EvalExec.class);
         var alias = as(evalExec.fields().get(0), Alias.class);
         assertThat(alias.name(), startsWith("$$order_by$0$"));
@@ -5884,14 +5929,14 @@ public void testPushTopNInlineDistanceToSource() {
 
     /**
      * <code>
-     * ProjectExec[[abbrev{f}#12, name{f}#13, location{f}#16, country{f}#17, city{f}#18]]
-     * \_TopNExec[[Order[distance{r}#4,ASC,LAST]],5[INTEGER],0]
-     *   \_ExchangeExec[[abbrev{f}#12, name{f}#13, location{f}#16, country{f}#17, city{f}#18, distance{r}#4],false]
-     *     \_ProjectExec[[abbrev{f}#12, name{f}#13, location{f}#16, country{f}#17, city{f}#18, distance{r}#4]]
-     *       \_FieldExtractExec[abbrev{f}#12, name{f}#13, country{f}#17, city{f}#18][]
-     *         \_EvalExec[[STDISTANCE(location{f}#16,[1 1 0 0 0 e1 7a 14 ae 47 21 29 40 a0 1a 2f dd 24 d6 4b 40][GEO_POINT]) AS distance
-     * ]]
-     *           \_FieldExtractExec[location{f}#16][]
+     * ProjectExec[[abbrev{f}#361, name{f}#362, location{f}#365, country{f}#366, city{f}#367]]
+     * \_TopNExec[[Order[distance{r}#353,ASC,LAST]],5[INTEGER],229]
+     *   \_ExchangeExec[[abbrev{f}#361, city{f}#367, country{f}#366, location{f}#365, name{f}#362, distance{r}#353],false]
+     *     \_ProjectExec[[abbrev{f}#361, city{f}#367, country{f}#366, location{f}#365, name{f}#362, distance{r}#353]]
+     *       \_FieldExtractExec[abbrev{f}#361, city{f}#367, country{f}#366, name{f}..]&lt;[],[]&gt;
+     *         \_EvalExec[[STDISTANCE(location{f}#365,[1 1 0 0 0 e1 7a 14 ae 47 21 29 40 a0 1a 2f dd 24 d6 4b 40][GEO_POINT]) AS distanc
+     * e#353]]
+     *           \_FieldExtractExec[location{f}#365]&lt;[],[]&gt;
      *             \_EsQueryExec[airports], indexMode[standard], query[
      * {
      *   "geo_shape":{
@@ -5904,7 +5949,7 @@ public void testPushTopNInlineDistanceToSource() {
      *       }
      *     }
      *   }
-     * }][_doc{f}#29], limit[5], sort[[GeoDistanceSort[field=location{f}#16, direction=ASC, lat=55.673, lon=12.565]]] estimatedRowSize[245]
+     * ][_doc{f}#378], limit[5], sort[[GeoDistanceSort[field=location{f}#365, direction=ASC, lat=55.673, lon=12.565]]] estimatedRowSize[245]
      * </code>
      */
     public void testPushTopNDistanceWithFilterToSource() {
@@ -5922,9 +5967,9 @@ public void testPushTopNDistanceWithFilterToSource() {
         var exchange = asRemoteExchange(topN.child());
 
         project = as(exchange.child(), ProjectExec.class);
-        assertThat(names(project.projections()), contains("abbrev", "name", "location", "country", "city", "distance"));
+        assertThat(names(project.projections()), contains("abbrev", "city", "country", "location", "name", "distance"));
         var extract = as(project.child(), FieldExtractExec.class);
-        assertThat(names(extract.attributesToExtract()), contains("abbrev", "name", "country", "city"));
+        assertThat(names(extract.attributesToExtract()), contains("abbrev", "city", "country", "name"));
         var evalExec = as(extract.child(), EvalExec.class);
         var alias = as(evalExec.fields().get(0), Alias.class);
         assertThat(alias.name(), is("distance"));
@@ -5960,48 +6005,25 @@ public void testPushTopNDistanceWithFilterToSource() {
 
     /**
      * <code>
-     * ProjectExec[[abbrev{f}#14, name{f}#15, location{f}#18, country{f}#19, city{f}#20]]
-     * \_TopNExec[[Order[distance{r}#4,ASC,LAST]],5[INTEGER],0]
-     *   \_ExchangeExec[[abbrev{f}#14, name{f}#15, location{f}#18, country{f}#19, city{f}#20, distance{r}#4],false]
-     *     \_ProjectExec[[abbrev{f}#14, name{f}#15, location{f}#18, country{f}#19, city{f}#20, distance{r}#4]]
-     *       \_FieldExtractExec[abbrev{f}#14, name{f}#15, country{f}#19, city{f}#20][]
-     *         \_EvalExec[[STDISTANCE(location{f}#18,[1 1 0 0 0 e1 7a 14 ae 47 21 29 40 a0 1a 2f dd 24 d6 4b 40][GEO_POINT])
-     *           AS distance]]
-     *           \_FieldExtractExec[location{f}#18][]
-     *             \_EsQueryExec[airports], indexMode[standard], query[{
-     *               "bool":{
-     *                 "filter":[
-     *                   {
-     *                     "esql_single_value":{
-     *                       "field":"scalerank",
-     *                       "next":{"range":{"scalerank":{"lt":6,"boost":1.0}}},
-     *                       "source":"scalerank lt 6@3:31"
-     *                     }
-     *                   },
-     *                   {
-     *                     "bool":{
-     *                       "must":[
-     *                         {"geo_shape":{
-     *                           "location":{
-     *                             "relation":"INTERSECTS",
-     *                             "shape":{"type":"Circle","radius":"499999.99999999994m","coordinates":[12.565,55.673]}
-     *                           }
-     *                         }},
-     *                         {"geo_shape":{
-     *                           "location":{
-     *                             "relation":"DISJOINT",
-     *                             "shape":{"type":"Circle","radius":"10000.000000000002m","coordinates":[12.565,55.673]}
-     *                           }
-     *                         }}
-     *                       ],
-     *                       "boost":1.0
-     *                     }
-     *                   }
-     *                 ],
-     *                 "boost":1.0
-     *               }}][_doc{f}#31], limit[5], sort[[
-     *                 GeoDistanceSort[field=location{f}#18, direction=ASC, lat=55.673, lon=12.565]
-     *               ]] estimatedRowSize[245]
+     * ProjectExec[[abbrev{f}#6367, name{f}#6368, location{f}#6371, country{f}#6372, city{f}#6373]]
+     * \_TopNExec[[Order[distance{r}#6357,ASC,LAST]],5[INTEGER],229]
+     *   \_ExchangeExec[[abbrev{f}#6367, city{f}#6373, country{f}#6372, location{f}#6371, name{f}#6368, distance{r}#6357],false]
+     *     \_ProjectExec[[abbrev{f}#6367, city{f}#6373, country{f}#6372, location{f}#6371, name{f}#6368, distance{r}#6357]]
+     *       \_FieldExtractExec[abbrev{f}#6367, city{f}#6373, country{f}#6372, name..]&lt;[],[]&gt;
+     *         \_EvalExec[[STDISTANCE(location{f}#6371,[1 1 0 0 0 e1 7a 14 ae 47 21 29 40 a0 1a 2f dd 24 d6 4b 40][GEO_POINT]) AS distan
+     * ce#6357]]
+     *           \_FieldExtractExec[location{f}#6371]&lt;[],[]&gt;
+     *             \_EsQueryExec[airports], indexMode[standard], query[
+     *             {"bool":{"filter":[{"esql_single_value":{"field":"scalerank","next":{"range":
+     *             {"scalerank":{"lt":6,"boost":0.0}}},"source":"scalerank &lt; 6@3:31"}},
+     *             {"bool":{"must":[{"geo_shape":
+     *             {"location":{"relation":"INTERSECTS","shape":
+     *             {"type":"Circle","radius":"499999.99999999994m","coordinates":[12.565,55.673]}}}},
+     *             {"geo_shape":{"location":{"relation":"DISJOINT","shape":
+     *             {"type":"Circle","radius":"10000.000000000002m","coordinates":[12.565,55.673]}}}}]
+     *             ,"boost":1.0}}],"boost":1.0}}
+     *             ][_doc{f}#6384], limit[5], sort[
+     *             [GeoDistanceSort[field=location{f}#6371, direction=ASC, lat=55.673, lon=12.565]]] estimatedRowSize[245]
      * </code>
      */
     public void testPushTopNDistanceWithCompoundFilterToSource() {
@@ -6019,9 +6041,9 @@ public void testPushTopNDistanceWithCompoundFilterToSource() {
         var exchange = asRemoteExchange(topN.child());
 
         project = as(exchange.child(), ProjectExec.class);
-        assertThat(names(project.projections()), contains("abbrev", "name", "location", "country", "city", "distance"));
+        assertThat(names(project.projections()), contains("abbrev", "city", "country", "location", "name", "distance"));
         var extract = as(project.child(), FieldExtractExec.class);
-        assertThat(names(extract.attributesToExtract()), contains("abbrev", "name", "country", "city"));
+        assertThat(names(extract.attributesToExtract()), contains("abbrev", "city", "country", "name"));
         var evalExec = as(extract.child(), EvalExec.class);
         var alias = as(evalExec.fields().get(0), Alias.class);
         assertThat(alias.name(), is("distance"));
@@ -6059,35 +6081,28 @@ public void testPushTopNDistanceWithCompoundFilterToSource() {
     /**
      * Tests that multiple sorts, including distance and a field, are pushed down to the source.
      * <code>
-     * ProjectExec[[abbrev{f}#25, name{f}#26, location{f}#29, country{f}#30, city{f}#31, scalerank{f}#27, scale{r}#7]]
-     * \_TopNExec[[
-     *     Order[distance{r}#4,ASC,LAST],
-     *     Order[scalerank{f}#27,ASC,LAST],
-     *     Order[scale{r}#7,DESC,FIRST],
-     *     Order[loc{r}#10,DESC,FIRST]
-     *   ],5[INTEGER],0]
-     *   \_ExchangeExec[[abbrev{f}#25, name{f}#26, location{f}#29, country{f}#30, city{f}#31, scalerank{f}#27, scale{r}#7,
-     *       distance{r}#4, loc{r}#10],false]
-     *     \_ProjectExec[[abbrev{f}#25, name{f}#26, location{f}#29, country{f}#30, city{f}#31, scalerank{f}#27, scale{r}#7,
-     *         distance{r}#4, loc{r}#10]]
-     *       \_FieldExtractExec[abbrev{f}#25, name{f}#26, country{f}#30, city{f}#31][]
-     *         \_EvalExec[[
-     *             STDISTANCE(location{f}#29,[1 1 0 0 0 e1 7a 14 ae 47 21 29 40 a0 1a 2f dd 24 d6 4b 40][GEO_POINT]) AS distance,
-     *             10[INTEGER] - scalerank{f}#27 AS scale, TOSTRING(location{f}#29) AS loc
-     *           ]]
-     *           \_FieldExtractExec[location{f}#29, scalerank{f}#27][]
-     *             \_EsQueryExec[airports], indexMode[standard], query[{
-     *               "bool":{
-     *                 "filter":[
-     *                   {"esql_single_value":{"field":"scalerank","next":{...},"source":"scalerank &lt; 6@3:31"}},
-     *                   {"bool":{
-     *                     "must":[
-     *                       {"geo_shape":{"location":{"relation":"INTERSECTS","shape":{...}}}},
-     *                       {"geo_shape":{"location":{"relation":"DISJOINT","shape":{...}}}}
-     *                     ],"boost":1.0}}],"boost":1.0}}][_doc{f}#44], limit[5], sort[[
-     *                       GeoDistanceSort[field=location{f}#29, direction=ASC, lat=55.673, lon=12.565],
-     *                       FieldSort[field=scalerank{f}#27, direction=ASC, nulls=LAST]
-     *                     ]] estimatedRowSize[303]
+     * ProjectExec[[abbrev{f}#7429, name{f}#7430, location{f}#7433, country{f}#7434, city{f}#7435, scalerank{f}#7431, scale{r}#74
+     * 11]]
+     * \_TopNExec[[Order[distance{r}#7408,ASC,LAST], Order[scalerank{f}#7431,ASC,LAST], Order[scale{r}#7411,DESC,FIRST], Order[l
+     * oc{r}#7414,DESC,FIRST]],5[INTEGER],287]
+     *   \_ExchangeExec[[abbrev{f}#7429, city{f}#7435, country{f}#7434, location{f}#7433, name{f}#7430, scalerank{f}#7431, distance{r}
+     * #7408, scale{r}#7411, loc{r}#7414],false]
+     *     \_ProjectExec[[abbrev{f}#7429, city{f}#7435, country{f}#7434, location{f}#7433, name{f}#7430, scalerank{f}#7431, distance{r}
+     * #7408, scale{r}#7411, loc{r}#7414]]
+     *       \_FieldExtractExec[abbrev{f}#7429, city{f}#7435, country{f}#7434, name..]&lt;[],[]&gt;
+     *         \_EvalExec[[STDISTANCE(location{f}#7433,[1 1 0 0 0 e1 7a 14 ae 47 21 29 40 a0 1a 2f dd 24 d6 4b 40][GEO_POINT]) AS distan
+     * ce#7408, 10[INTEGER] - scalerank{f}#7431 AS scale#7411, TOSTRING(location{f}#7433) AS loc#7414]]
+     *           \_FieldExtractExec[location{f}#7433, scalerank{f}#7431]&lt;[],[]&gt;
+     *             \_EsQueryExec[airports], indexMode[standard], query[
+     *             {"bool":{"filter":[{"esql_single_value":{"field":"scalerank","next":
+     *             {"range":{"scalerank":{"lt":6,"boost":0.0}}},"source":"scalerank &lt; 6@3:31"}},
+     *             {"bool":{"must":[{"geo_shape":{"location":{"relation":"INTERSECTS","shape":
+     *             {"type":"Circle","radius":"499999.99999999994m","coordinates":[12.565,55.673]}}}},
+     *             {"geo_shape":{"location":{"relation":"DISJOINT","shape":
+     *             {"type":"Circle","radius":"10000.000000000002m","coordinates":[12.565,55.673]}}}}],
+     *             "boost":1.0}}],"boost":1.0}}][_doc{f}#7448], limit[5], sort[
+     *             [GeoDistanceSort[field=location{f}#7433, direction=ASC, lat=55.673, lon=12.565],
+     *             FieldSort[field=scalerank{f}#7431, direction=ASC, nulls=LAST]]] estimatedRowSize[303]
      * </code>
      */
     public void testPushTopNDistanceAndPushableFieldWithCompoundFilterToSource() {
@@ -6108,10 +6123,10 @@ public void testPushTopNDistanceAndPushableFieldWithCompoundFilterToSource() {
         project = as(exchange.child(), ProjectExec.class);
         assertThat(
             names(project.projections()),
-            contains("abbrev", "name", "location", "country", "city", "scalerank", "scale", "distance", "loc")
+            contains("abbrev", "city", "country", "location", "name", "scalerank", "distance", "scale", "loc")
         );
         var extract = as(project.child(), FieldExtractExec.class);
-        assertThat(names(extract.attributesToExtract()), contains("abbrev", "name", "country", "city"));
+        assertThat(names(extract.attributesToExtract()), contains("abbrev", "city", "country", "name"));
         var evalExec = as(extract.child(), EvalExec.class);
         var alias = as(evalExec.fields().get(0), Alias.class);
         assertThat(alias.name(), is("distance"));
@@ -6153,26 +6168,30 @@ public void testPushTopNDistanceAndPushableFieldWithCompoundFilterToSource() {
     /**
      * This test shows that if the filter contains a predicate on the same field that is sorted, we cannot push down the sort.
      * <code>
-     * ProjectExec[[abbrev{f}#23, name{f}#24, location{f}#27, country{f}#28, city{f}#29, scalerank{f}#25 AS scale]]
-     * \_TopNExec[[Order[distance{r}#4,ASC,LAST], Order[scalerank{f}#25,ASC,LAST]],5[INTEGER],0]
-     *   \_ExchangeExec[[abbrev{f}#23, name{f}#24, location{f}#27, country{f}#28, city{f}#29, scalerank{f}#25, distance{r}#4],false]
-     *     \_ProjectExec[[abbrev{f}#23, name{f}#24, location{f}#27, country{f}#28, city{f}#29, scalerank{f}#25, distance{r}#4]]
-     *       \_FieldExtractExec[abbrev{f}#23, name{f}#24, country{f}#28, city{f}#29][]
-     *         \_TopNExec[[Order[distance{r}#4,ASC,LAST], Order[scalerank{f}#25,ASC,LAST]],5[INTEGER],208]
-     *           \_FieldExtractExec[scalerank{f}#25][]
-     *             \_FilterExec[SUBSTRING(position{r}#7,1[INTEGER],5[INTEGER]) == [50 4f 49 4e 54][KEYWORD]]
-     *               \_EvalExec[[
-     *                   STDISTANCE(location{f}#27,[1 1 0 0 0 e1 7a 14 ae 47 21 29 40 a0 1a 2f dd 24 d6 4b 40][GEO_POINT]) AS distance,
-     *                   TOSTRING(location{f}#27) AS position
-     *                 ]]
-     *                 \_FieldExtractExec[location{f}#27][]
-     *                   \_EsQueryExec[airports], indexMode[standard], query[{
-     *                     "bool":{"filter":[
-     *                       {"esql_single_value":{"field":"scalerank","next":{"range":{"scalerank":{"lt":6,"boost":1.0}}},"source":...}},
-     *                       {"bool":{"must":[
-     *                         {"geo_shape":{"location":{"relation":"INTERSECTS","shape":{...}}}},
-     *                         {"geo_shape":{"location":{"relation":"DISJOINT","shape":{...}}}}
-     *                       ],"boost":1.0}}],"boost":1.0}}][_doc{f}#42], limit[], sort[] estimatedRowSize[87]
+     * ProjectExec[[abbrev{f}#4856, name{f}#4857, location{f}#4860, country{f}#4861, city{f}#4862, scalerank{f}#4858 AS scale#484
+     * 3]]
+     * \_TopNExec[[Order[distance{r}#4837,ASC,LAST], Order[scalerank{f}#4858,ASC,LAST]],5[INTEGER],233]
+     *   \_ExchangeExec[[abbrev{f}#4856, city{f}#4862, country{f}#4861, location{f}#4860, name{f}#4857, scalerank{f}#4858, distance{r}
+     * #4837],false]
+     *     \_ProjectExec[[abbrev{f}#4856, city{f}#4862, country{f}#4861, location{f}#4860, name{f}#4857, scalerank{f}#4858, distance{r}
+     * #4837]]
+     *       \_FieldExtractExec[abbrev{f}#4856, city{f}#4862, country{f}#4861, name..]&lt;[],[]&gt;
+     *         \_TopNExec[[Order[distance{r}#4837,ASC,LAST], Order[scalerank{f}#4858,ASC,LAST]],5[INTEGER],303]
+     *           \_FieldExtractExec[scalerank{f}#4858]&lt;[],[]&gt;
+     *             \_FilterExec[SUBSTRING(position{r}#4840,1[INTEGER],5[INTEGER]) == POINT[KEYWORD]]
+     *               \_EvalExec[[STDISTANCE(location{f}#4860,[1 1 0 0 0 e1 7a 14 ae 47 21 29 40 a0 1a 2f dd 24 d6 4b 40][GEO_POINT]) AS
+     * distance#4837, TOSTRING(location{f}#4860) AS position#4840]]
+     *                 \_FieldExtractExec[location{f}#4860]&lt;[],[]&gt;
+     *                   \_EsQueryExec[airports], indexMode[standard], query[
+     *                   {"bool":{"filter":[
+     *                   {"esql_single_value":
+     *                   {"field":"scalerank","next":{"range":{"scalerank":{"lt":6,"boost":0.0}}},"source":"scale &lt; 6@3:93"}},
+     *                   {"bool":{"must":[
+     *                   {"geo_shape":{"location":{"relation":"INTERSECTS","shape":
+     *                   {"type":"Circle","radius":"499999.99999999994m","coordinates":[12.565,55.673]}}}},
+     *                   {"geo_shape":{"location":{"relation":"DISJOINT","shape":
+     *                   {"type":"Circle","radius":"10000.000000000002m","coordinates":[12.565,55.673]}}}}
+     *                   ],"boost":1.0}}],"boost":1.0}}][_doc{f}#4875], limit[], sort[] estimatedRowSize[87]
      * </code>
      */
     public void testPushTopNDistanceAndNonPushableEvalWithCompoundFilterToSource() {
@@ -6191,9 +6210,9 @@ public void testPushTopNDistanceAndNonPushableEvalWithCompoundFilterToSource() {
         var exchange = asRemoteExchange(topN.child());
 
         project = as(exchange.child(), ProjectExec.class);
-        assertThat(names(project.projections()), contains("abbrev", "name", "location", "country", "city", "scalerank", "distance"));
+        assertThat(names(project.projections()), contains("abbrev", "city", "country", "location", "name", "scalerank", "distance"));
         var extract = as(project.child(), FieldExtractExec.class);
-        assertThat(names(extract.attributesToExtract()), contains("abbrev", "name", "country", "city"));
+        assertThat(names(extract.attributesToExtract()), contains("abbrev", "city", "country", "name"));
         var topNChild = as(extract.child(), TopNExec.class);
         extract = as(topNChild.child(), FieldExtractExec.class);
         assertThat(names(extract.attributesToExtract()), contains("scalerank"));
@@ -6228,27 +6247,25 @@ public void testPushTopNDistanceAndNonPushableEvalWithCompoundFilterToSource() {
     /**
      * This test shows that if the filter contains a predicate on the same field that is sorted, we cannot push down the sort.
      * <code>
-     * ProjectExec[[abbrev{f}#23, name{f}#24, location{f}#27, country{f}#28, city{f}#29, scale{r}#10]]
-     * \_TopNExec[[Order[distance{r}#4,ASC,LAST], Order[scale{r}#10,ASC,LAST]],5[INTEGER],0]
-     *   \_ExchangeExec[[abbrev{f}#23, name{f}#24, location{f}#27, country{f}#28, city{f}#29, scale{r}#10, distance{r}#4],false]
-     *     \_ProjectExec[[abbrev{f}#23, name{f}#24, location{f}#27, country{f}#28, city{f}#29, scale{r}#10, distance{r}#4]]
-     *       \_FieldExtractExec[abbrev{f}#23, name{f}#24, country{f}#28, city{f}#29][]
-     *         \_TopNExec[[Order[distance{r}#4,ASC,LAST], Order[scale{r}#10,ASC,LAST]],5[INTEGER],208]
-     *           \_FilterExec[
-     *               SUBSTRING(position{r}#7,1[INTEGER],5[INTEGER]) == [50 4f 49 4e 54][KEYWORD]
-     *               AND scale{r}#10 &gt; 3[INTEGER]
-     *             ]
-     *             \_EvalExec[[
-     *                 STDISTANCE(location{f}#27,[1 1 0 0 0 e1 7a 14 ae 47 21 29 40 a0 1a 2f dd 24 d6 4b 40][GEO_POINT]) AS distance,
-     *                 TOSTRING(location{f}#27) AS position,
-     *                 10[INTEGER] - scalerank{f}#25 AS scale
-     *               ]]
-     *               \_FieldExtractExec[location{f}#27, scalerank{f}#25][]
-     *                 \_EsQueryExec[airports], indexMode[standard], query[{
-     *                   "bool":{"must":[
-     *                     {"geo_shape":{"location":{"relation":"INTERSECTS","shape":{...}}}},
-     *                     {"geo_shape":{"location":{"relation":"DISJOINT","shape":{...}}}}
-     *                   ],"boost":1.0}}][_doc{f}#42], limit[], sort[] estimatedRowSize[91]
+     *ProjectExec[[abbrev{f}#1447, name{f}#1448, location{f}#1451, country{f}#1452, city{f}#1453, scalerank{r}#1434]]
+     * \_TopNExec[[Order[distance{r}#1428,ASC,LAST], Order[scalerank{r}#1434,ASC,LAST]],5[INTEGER],233]
+     *   \_ExchangeExec[[abbrev{f}#1447, city{f}#1453, country{f}#1452, location{f}#1451, name{f}#1448, distance{r}#1428, scalerank{r}
+     * #1434],false]
+     *     \_ProjectExec[[abbrev{f}#1447, city{f}#1453, country{f}#1452, location{f}#1451, name{f}#1448, distance{r}#1428, scalerank{r}
+     * #1434]]
+     *       \_FieldExtractExec[abbrev{f}#1447, city{f}#1453, country{f}#1452, name..]&lt;[],[]&gt;
+     *         \_TopNExec[[Order[distance{r}#1428,ASC,LAST], Order[scalerank{r}#1434,ASC,LAST]],5[INTEGER],303]
+     *           \_FilterExec[SUBSTRING(position{r}#1431,1[INTEGER],5[INTEGER]) == POINT[KEYWORD] AND scalerank{r}#1434 > 3[INTEGER]]
+     *             \_EvalExec[[STDISTANCE(location{f}#1451,[1 1 0 0 0 e1 7a 14 ae 47 21 29 40 a0 1a 2f dd 24 d6 4b 40][GEO_POINT]) AS distan
+     * ce#1428, TOSTRING(location{f}#1451) AS position#1431, 10[INTEGER] - scalerank{f}#1449 AS scalerank#1434]]
+     *               \_FieldExtractExec[location{f}#1451, scalerank{f}#1449]&lt;[],[]&gt;
+     *                 \_EsQueryExec[airports], indexMode[standard], query[
+     *                 {"bool":{"must":[
+     *                 {"geo_shape":{"location":{"relation":"INTERSECTS","shape":
+     *                 {"type":"Circle","radius":"499999.99999999994m","coordinates":[12.565,55.673]}}}},
+     *                 {"geo_shape":{"location":{"relation":"DISJOINT","shape":
+     *                 {"type":"Circle","radius":"10000.000000000002m","coordinates":[12.565,55.673]}}}}
+     *                 ],"boost":1.0}}][_doc{f}#1466], limit[], sort[] estimatedRowSize[91]
      * </code>
      */
     public void testPushTopNDistanceAndNonPushableEvalsWithCompoundFilterToSource() {
@@ -6267,9 +6284,9 @@ public void testPushTopNDistanceAndNonPushableEvalsWithCompoundFilterToSource()
         var exchange = asRemoteExchange(topN.child());
 
         project = as(exchange.child(), ProjectExec.class);
-        assertThat(names(project.projections()), contains("abbrev", "name", "location", "country", "city", "scalerank", "distance"));
+        assertThat(names(project.projections()), contains("abbrev", "city", "country", "location", "name", "distance", "scalerank"));
         var extract = as(project.child(), FieldExtractExec.class);
-        assertThat(names(extract.attributesToExtract()), contains("abbrev", "name", "country", "city"));
+        assertThat(names(extract.attributesToExtract()), contains("abbrev", "city", "country", "name"));
         var topNChild = as(extract.child(), TopNExec.class);
         var filter = as(topNChild.child(), FilterExec.class);
         assertThat(filter.condition(), isA(And.class));
@@ -6344,9 +6361,9 @@ public void testPushTopNDistanceWithCompoundFilterToSourceAndDisjunctiveNonPusha
         var exchange = asRemoteExchange(topN.child());
 
         project = as(exchange.child(), ProjectExec.class);
-        assertThat(names(project.projections()), contains("abbrev", "name", "location", "country", "city", "scalerank", "distance"));
+        assertThat(names(project.projections()), contains("abbrev", "city", "country", "location", "name", "scalerank", "distance"));
         var extract = as(project.child(), FieldExtractExec.class);
-        assertThat(names(extract.attributesToExtract()), contains("abbrev", "name", "country", "city"));
+        assertThat(names(extract.attributesToExtract()), contains("abbrev", "city", "country", "name"));
         var topNChild = as(extract.child(), TopNExec.class);
         var filter = as(topNChild.child(), FilterExec.class);
         assertThat(filter.condition(), isA(Or.class));
@@ -6373,28 +6390,29 @@ public void testPushTopNDistanceWithCompoundFilterToSourceAndDisjunctiveNonPusha
 
     /**
      * <code>
-     * ProjectExec[[abbrev{f}#15, name{f}#16, location{f}#19, country{f}#20, city{f}#21]]
-     * \_TopNExec[[Order[scalerank{f}#17,ASC,LAST], Order[distance{r}#4,ASC,LAST]],15[INTEGER],0]
-     *   \_ExchangeExec[[abbrev{f}#15, name{f}#16, location{f}#19, country{f}#20, city{f}#21, scalerank{f}#17, distance{r}#4],false]
-     *     \_ProjectExec[[abbrev{f}#15, name{f}#16, location{f}#19, country{f}#20, city{f}#21, scalerank{f}#17, distance{r}#4]]
-     *       \_FieldExtractExec[abbrev{f}#15, name{f}#16, country{f}#20, city{f}#21, ..][]
-     *         \_EvalExec[[STDISTANCE(location{f}#19,[1 1 0 0 0 e1 7a 14 ae 47 21 29 40 a0 1a 2f dd 24 d6 4b 40][GEO_POINT])
-     *           AS distance]]
-     *           \_FieldExtractExec[location{f}#19][]
-     *             \_EsQueryExec[airports], indexMode[standard], query[{
-     *               "bool":{
-     *                 "filter":[
-     *                   {"esql_single_value":{"field":"scalerank",...,"source":"scalerank lt 6@3:31"}},
-     *                   {"bool":{"must":[
-     *                     {"geo_shape":{"location":{"relation":"INTERSECTS","shape":{...}}}},
-     *                     {"geo_shape":{"location":{"relation":"DISJOINT","shape":{...}}}}
-     *                   ],"boost":1.0}}
-     *                 ],"boost":1.0
-     *               }
-     *             }][_doc{f}#32], limit[], sort[[
-     *               FieldSort[field=scalerank{f}#17, direction=ASC, nulls=LAST],
-     *               GeoDistanceSort[field=location{f}#19, direction=ASC, lat=55.673, lon=12.565]
-     *             ]] estimatedRowSize[37]
+     * ProjectExec[[abbrev{f}#6090, name{f}#6091, location{f}#6094, country{f}#6095, city{f}#6096]]
+     * \_TopNExec[[Order[scalerank{f}#6092,ASC,LAST], Order[distance{r}#6079,ASC,LAST]],15[INTEGER],233]
+     *   \_ExchangeExec[[abbrev{f}#6090, city{f}#6096, country{f}#6095, location{f}#6094, name{f}#6091, scalerank{f}#6092, distance{r}
+     * #6079],false]
+     *     \_ProjectExec[[abbrev{f}#6090, city{f}#6096, country{f}#6095, location{f}#6094, name{f}#6091, scalerank{f}#6092, distance{r}
+     * #6079]]
+     *       \_FieldExtractExec[abbrev{f}#6090, city{f}#6096, country{f}#6095, name..]&lt;[],[]&gt;
+     *         \_EvalExec[[STDISTANCE(location{f}#6094,[1 1 0 0 0 e1 7a 14 ae 47 21 29 40 a0 1a 2f dd 24 d6 4b 40][GEO_POINT]) AS distan
+     * ce#6079]]
+     *           \_FieldExtractExec[location{f}#6094]&lt;[],[]&gt;
+     *             \_EsQueryExec[airports], indexMode[standard], query[
+     *             {"bool":{"filter":[
+     *             {"esql_single_value":{"field":"scalerank","next":{"range":
+     *             {"scalerank":{"lt":6,"boost":0.0}}},"source":"scalerank &lt; 6@3:31"}},
+     *             {"bool":{"must":[
+     *             {"geo_shape": {"location":{"relation":"INTERSECTS","shape":
+     *             {"type":"Circle","radius":"499999.99999999994m","coordinates":[12.565,55.673]}}}},
+     *             {"geo_shape":{"location":{"relation":"DISJOINT","shape":
+     *             {"type":"Circle","radius":"10000.000000000002m","coordinates":[12.565,55.673]}}}}
+     *             ],"boost":1.0}}],"boost":1.0}}
+     *             ][_doc{f}#6107], limit[15], sort[
+     *             [FieldSort[field=scalerank{f}#6092, direction=ASC, nulls=LAST],
+     *             GeoDistanceSort[field=location{f}#6094, direction=ASC, lat=55.673, lon=12.565]]] estimatedRowSize[249]
      * </code>
      */
     public void testPushCompoundTopNDistanceWithCompoundFilterToSource() {
@@ -6413,9 +6431,9 @@ public void testPushCompoundTopNDistanceWithCompoundFilterToSource() {
         var exchange = asRemoteExchange(topN.child());
 
         project = as(exchange.child(), ProjectExec.class);
-        assertThat(names(project.projections()), contains("abbrev", "name", "location", "country", "city", "scalerank", "distance"));
+        assertThat(names(project.projections()), contains("abbrev", "city", "country", "location", "name", "scalerank", "distance"));
         var extract = as(project.child(), FieldExtractExec.class);
-        assertThat(names(extract.attributesToExtract()), contains("abbrev", "name", "country", "city", "scalerank"));
+        assertThat(names(extract.attributesToExtract()), contains("abbrev", "city", "country", "name", "scalerank"));
         var evalExec = as(extract.child(), EvalExec.class);
         var alias = as(evalExec.fields().get(0), Alias.class);
         assertThat(alias.name(), is("distance"));
@@ -8053,7 +8071,7 @@ public void testNotEqualsPushdownToDelegate() {
      *              ges{f}#5, last_name{f}#6, long_noidx{f}#12, salary{f}#7],false]
      *      \_ProjectExec[[_meta_field{f}#8, emp_no{f}#2, first_name{f}#3, gender{f}#4, hire_date{f}#9, job{f}#10, job.raw{f}#11, langua
      *              ges{f}#5, last_name{f}#6, long_noidx{f}#12, salary{f}#7]]
-     *        \_FieldExtractExec[_meta_field{f}#8, emp_no{f}#2, first_name{f}#3, gen..]<[],[]>
+     *        \_FieldExtractExec[_meta_field{f}#8, emp_no{f}#2, first_name{f}#3, gen..]&lt;[],[]&gt;
      *          \_EsQueryExec[test], indexMode[standard],
      *                  query[{"bool":{"filter":[{"sampling":{"probability":0.1,"seed":234,"hash":0}}],"boost":1.0}}]
      *                  [_doc{f}#24], limit[1000], sort[] estimatedRowSize[332]

From e3cf9bbd2cd17fa72acdc23cb42007d22929c541 Mon Sep 17 00:00:00 2001
From: Jan Kuipers <148754765+jan-elastic@users.noreply.github.com>
Date: Thu, 17 Jul 2025 10:24:30 +0200
Subject: [PATCH 4/7] ES|QL categorize options (#131104)

* ES|QL categorize options

* refactor options

* fix serialization

* polish

* add verfications

* better test coverage + polish code

* better test coverage + polish code
---
 .../compute/operator/AggregatorBenchmark.java |   2 +-
 .../functionNamedParams/categorize.md         |  13 ++
 .../_snippets/functions/layout/categorize.md  |   3 +
 .../functions/parameters/categorize.md        |   3 +
 .../_snippets/functions/types/categorize.md   |   8 +-
 .../esql/images/functions/categorize.svg      |   2 +-
 .../org/elasticsearch/TransportVersions.java  |   1 +
 .../aggregation/blockhash/BlockHash.java      |  30 +++--
 .../blockhash/CategorizeBlockHash.java        |  39 ++++--
 .../CategorizePackedValuesBlockHash.java      |  10 +-
 .../blockhash/CategorizeBlockHashTests.java   | 110 ++++++++++------
 .../CategorizePackedValuesBlockHashTests.java |  19 ++-
 .../blockhash/TopNBlockHashTests.java         |   2 +-
 .../HashAggregationOperatorTests.java         |   4 +-
 .../src/main/resources/categorize.csv-spec    |  81 +++++++++++-
 .../xpack/esql/action/EsqlCapabilities.java   |  11 +-
 .../esql/expression/function/Options.java     | 107 ++++++++++++++++
 .../function/fulltext/FullTextFunction.java   |  71 -----------
 .../expression/function/fulltext/Match.java   |  12 +-
 .../function/fulltext/MatchPhrase.java        |  10 +-
 .../function/fulltext/MultiMatch.java         |   3 +-
 .../function/fulltext/QueryString.java        |  10 +-
 .../function/grouping/Categorize.java         | 119 ++++++++++++++++--
 .../esql/expression/function/vector/Knn.java  |  38 +-----
 ...laceAggregateNestedExpressionWithEval.java |   7 +-
 .../AbstractPhysicalOperationProviders.java   |   8 +-
 .../xpack/esql/analysis/VerifierTests.java    |  51 ++++++++
 .../grouping/CategorizeErrorTests.java        |   2 +-
 .../function/grouping/CategorizeTests.java    |   2 +-
 .../rules/logical/FoldNullTests.java          |   2 +-
 .../SerializableTokenListCategory.java        |   7 ++
 31 files changed, 572 insertions(+), 215 deletions(-)
 create mode 100644 docs/reference/query-languages/esql/_snippets/functions/functionNamedParams/categorize.md
 create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/Options.java

diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/AggregatorBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/AggregatorBenchmark.java
index d144d7601349d..d5fe1b4a697e0 100644
--- a/benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/AggregatorBenchmark.java
+++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/AggregatorBenchmark.java
@@ -191,7 +191,7 @@ private static Operator operator(DriverContext driverContext, String grouping, S
                 new BlockHash.GroupSpec(2, ElementType.BYTES_REF)
             );
             case TOP_N_LONGS -> List.of(
-                new BlockHash.GroupSpec(0, ElementType.LONG, false, new BlockHash.TopNDef(0, true, true, TOP_N_LIMIT))
+                new BlockHash.GroupSpec(0, ElementType.LONG, null, new BlockHash.TopNDef(0, true, true, TOP_N_LIMIT))
             );
             default -> throw new IllegalArgumentException("unsupported grouping [" + grouping + "]");
         };
diff --git a/docs/reference/query-languages/esql/_snippets/functions/functionNamedParams/categorize.md b/docs/reference/query-languages/esql/_snippets/functions/functionNamedParams/categorize.md
new file mode 100644
index 0000000000000..acd2064002b44
--- /dev/null
+++ b/docs/reference/query-languages/esql/_snippets/functions/functionNamedParams/categorize.md
@@ -0,0 +1,13 @@
+% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it.
+
+**Supported function named parameters**
+
+`output_format`
+:   (boolean) The output format of the categories. Defaults to regex.
+
+`similarity_threshold`
+:   (boolean) The minimum percentage of token weight that must match for text to be added to the category bucket. Must be between 1 and 100. The larger the value the narrower the categories. Larger values will increase memory usage and create narrower categories. Defaults to 70.
+
+`analyzer`
+:   (keyword) Analyzer used to convert the field into tokens for text categorization.
+
diff --git a/docs/reference/query-languages/esql/_snippets/functions/layout/categorize.md b/docs/reference/query-languages/esql/_snippets/functions/layout/categorize.md
index ca23c1e2efc23..2e331187665f4 100644
--- a/docs/reference/query-languages/esql/_snippets/functions/layout/categorize.md
+++ b/docs/reference/query-languages/esql/_snippets/functions/layout/categorize.md
@@ -19,5 +19,8 @@
 :::{include} ../types/categorize.md
 :::
 
+:::{include} ../functionNamedParams/categorize.md
+:::
+
 :::{include} ../examples/categorize.md
 :::
diff --git a/docs/reference/query-languages/esql/_snippets/functions/parameters/categorize.md b/docs/reference/query-languages/esql/_snippets/functions/parameters/categorize.md
index 8733908754570..c013b67375a3d 100644
--- a/docs/reference/query-languages/esql/_snippets/functions/parameters/categorize.md
+++ b/docs/reference/query-languages/esql/_snippets/functions/parameters/categorize.md
@@ -5,3 +5,6 @@
 `field`
 :   Expression to categorize
 
+`options`
+:   (Optional) Categorize additional options as [function named parameters](/reference/query-languages/esql/esql-syntax.md#esql-function-named-params).
+
diff --git a/docs/reference/query-languages/esql/_snippets/functions/types/categorize.md b/docs/reference/query-languages/esql/_snippets/functions/types/categorize.md
index 6043fbe719ff8..8ebe22b61286c 100644
--- a/docs/reference/query-languages/esql/_snippets/functions/types/categorize.md
+++ b/docs/reference/query-languages/esql/_snippets/functions/types/categorize.md
@@ -2,8 +2,8 @@
 
 **Supported types**
 
-| field | result |
-| --- | --- |
-| keyword | keyword |
-| text | keyword |
+| field | options | result |
+| --- | --- | --- |
+| keyword | | keyword |
+| text | | keyword |
 
diff --git a/docs/reference/query-languages/esql/images/functions/categorize.svg b/docs/reference/query-languages/esql/images/functions/categorize.svg
index bbb2bda7c480b..7629b9bb978ba 100644
--- a/docs/reference/query-languages/esql/images/functions/categorize.svg
+++ b/docs/reference/query-languages/esql/images/functions/categorize.svg
@@ -1 +1 @@
-<svg version="1.1" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns="http://www.w3.org/2000/svg" width="324" height="46" viewbox="0 0 324 46"><defs><style type="text/css">.c{fill:none;stroke:#222222;}.k{fill:#000000;font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;font-size:20px;}.s{fill:#e4f4ff;stroke:#222222;}.syn{fill:#8D8D8D;font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;font-size:20px;}</style></defs><path class="c" d="M0 31h5m140 0h10m32 0h10m80 0h10m32 0h5"/><rect class="s" x="5" y="5" width="140" height="36"/><text class="k" x="15" y="31">CATEGORIZE</text><rect class="s" x="155" y="5" width="32" height="36" rx="7"/><text class="syn" x="165" y="31">(</text><rect class="s" x="197" y="5" width="80" height="36" rx="7"/><text class="k" x="207" y="31">field</text><rect class="s" x="287" y="5" width="32" height="36" rx="7"/><text class="syn" x="297" y="31">)</text></svg>
\ No newline at end of file
+<svg version="1.1" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns="http://www.w3.org/2000/svg" width="520" height="61" viewbox="0 0 520 61"><defs><style type="text/css">.c{fill:none;stroke:#222222;}.k{fill:#000000;font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;font-size:20px;}.s{fill:#e4f4ff;stroke:#222222;}.syn{fill:#8D8D8D;font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;font-size:20px;}</style></defs><path class="c" d="M0 31h5m140 0h10m32 0h10m80 0h10m32 0h30m104 0h20m-139 0q5 0 5 5v10q0 5 5 5h114q5 0 5-5v-10q0-5 5-5m5 0h10m32 0h5"/><rect class="s" x="5" y="5" width="140" height="36"/><text class="k" x="15" y="31">CATEGORIZE</text><rect class="s" x="155" y="5" width="32" height="36" rx="7"/><text class="syn" x="165" y="31">(</text><rect class="s" x="197" y="5" width="80" height="36" rx="7"/><text class="k" x="207" y="31">field</text><rect class="s" x="287" y="5" width="32" height="36" rx="7"/><text class="syn" x="297" y="31">,</text><rect class="s" x="349" y="5" width="104" height="36" rx="7"/><text class="k" x="359" y="31">options</text><rect class="s" x="483" y="5" width="32" height="36" rx="7"/><text class="syn" x="493" y="31">)</text></svg>
\ No newline at end of file
diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java
index ae0ccecf15ed7..99c255acf7268 100644
--- a/server/src/main/java/org/elasticsearch/TransportVersions.java
+++ b/server/src/main/java/org/elasticsearch/TransportVersions.java
@@ -340,6 +340,7 @@ static TransportVersion def(int id) {
     public static final TransportVersion ESQL_FIXED_INDEX_LIKE = def(9_119_0_00);
     public static final TransportVersion LOOKUP_JOIN_CCS = def(9_120_0_00);
     public static final TransportVersion NODE_USAGE_STATS_FOR_THREAD_POOLS_IN_CLUSTER_INFO = def(9_121_0_00);
+    public static final TransportVersion ESQL_CATEGORIZE_OPTIONS = def(9_122_0_00);
 
     /*
      * STOP! READ THIS FIRST! No, really,
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/BlockHash.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/BlockHash.java
index 1cae296f09c02..63f4d9c96bcd0 100644
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/BlockHash.java
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/BlockHash.java
@@ -128,16 +128,26 @@ public abstract class BlockHash implements Releasable, SeenGroupIds {
     public record TopNDef(int order, boolean asc, boolean nullsFirst, int limit) {}
 
     /**
-     * @param isCategorize Whether this group is a CATEGORIZE() or not.
-     *                     May be changed in the future when more stateful grouping functions are added.
+     * Configuration for a BlockHash group spec that is doing text categorization.
      */
-    public record GroupSpec(int channel, ElementType elementType, boolean isCategorize, @Nullable TopNDef topNDef) {
+    public record CategorizeDef(String analyzer, OutputFormat outputFormat, int similarityThreshold) {
+        public enum OutputFormat {
+            REGEX,
+            TOKENS
+        }
+    }
+
+    public record GroupSpec(int channel, ElementType elementType, @Nullable CategorizeDef categorizeDef, @Nullable TopNDef topNDef) {
         public GroupSpec(int channel, ElementType elementType) {
-            this(channel, elementType, false, null);
+            this(channel, elementType, null, null);
+        }
+
+        public GroupSpec(int channel, ElementType elementType, CategorizeDef categorizeDef) {
+            this(channel, elementType, categorizeDef, null);
         }
 
-        public GroupSpec(int channel, ElementType elementType, boolean isCategorize) {
-            this(channel, elementType, isCategorize, null);
+        public boolean isCategorize() {
+            return categorizeDef != null;
         }
     }
 
@@ -207,7 +217,13 @@ public static BlockHash buildCategorizeBlockHash(
         int emitBatchSize
     ) {
         if (groups.size() == 1) {
-            return new CategorizeBlockHash(blockFactory, groups.get(0).channel, aggregatorMode, analysisRegistry);
+            return new CategorizeBlockHash(
+                blockFactory,
+                groups.get(0).channel,
+                aggregatorMode,
+                groups.get(0).categorizeDef,
+                analysisRegistry
+            );
         } else {
             assert groups.get(0).isCategorize();
             assert groups.subList(1, groups.size()).stream().noneMatch(GroupSpec::isCategorize);
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeBlockHash.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeBlockHash.java
index 5e716d8c9d5ff..fcc1a7f3d271e 100644
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeBlockHash.java
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeBlockHash.java
@@ -18,7 +18,6 @@
 import org.elasticsearch.common.util.BytesRefHash;
 import org.elasticsearch.compute.aggregation.AggregatorMode;
 import org.elasticsearch.compute.aggregation.GroupingAggregatorFunction;
-import org.elasticsearch.compute.aggregation.SeenGroupIds;
 import org.elasticsearch.compute.data.Block;
 import org.elasticsearch.compute.data.BlockFactory;
 import org.elasticsearch.compute.data.BytesRefBlock;
@@ -47,12 +46,13 @@
  */
 public class CategorizeBlockHash extends BlockHash {
 
-    private static final CategorizationAnalyzerConfig ANALYZER_CONFIG = CategorizationAnalyzerConfig
+    private static final CategorizationAnalyzerConfig DEFAULT_ANALYZER_CONFIG = CategorizationAnalyzerConfig
         .buildStandardEsqlCategorizationAnalyzer();
     private static final int NULL_ORD = 0;
 
     private final int channel;
     private final AggregatorMode aggregatorMode;
+    private final CategorizeDef categorizeDef;
     private final TokenListCategorizer.CloseableTokenListCategorizer categorizer;
     private final CategorizeEvaluator evaluator;
 
@@ -64,28 +64,38 @@ public class CategorizeBlockHash extends BlockHash {
      */
     private boolean seenNull = false;
 
-    CategorizeBlockHash(BlockFactory blockFactory, int channel, AggregatorMode aggregatorMode, AnalysisRegistry analysisRegistry) {
+    CategorizeBlockHash(
+        BlockFactory blockFactory,
+        int channel,
+        AggregatorMode aggregatorMode,
+        CategorizeDef categorizeDef,
+        AnalysisRegistry analysisRegistry
+    ) {
         super(blockFactory);
 
         this.channel = channel;
         this.aggregatorMode = aggregatorMode;
+        this.categorizeDef = categorizeDef;
 
         this.categorizer = new TokenListCategorizer.CloseableTokenListCategorizer(
             new CategorizationBytesRefHash(new BytesRefHash(2048, blockFactory.bigArrays())),
             CategorizationPartOfSpeechDictionary.getInstance(),
-            0.70f
+            categorizeDef.similarityThreshold() / 100.0f
         );
 
         if (aggregatorMode.isInputPartial() == false) {
-            CategorizationAnalyzer analyzer;
+            CategorizationAnalyzer categorizationAnalyzer;
             try {
                 Objects.requireNonNull(analysisRegistry);
-                analyzer = new CategorizationAnalyzer(analysisRegistry, ANALYZER_CONFIG);
-            } catch (Exception e) {
+                CategorizationAnalyzerConfig config = categorizeDef.analyzer() == null
+                    ? DEFAULT_ANALYZER_CONFIG
+                    : new CategorizationAnalyzerConfig.Builder().setAnalyzer(categorizeDef.analyzer()).build();
+                categorizationAnalyzer = new CategorizationAnalyzer(analysisRegistry, config);
+            } catch (IOException e) {
                 categorizer.close();
                 throw new RuntimeException(e);
             }
-            this.evaluator = new CategorizeEvaluator(analyzer);
+            this.evaluator = new CategorizeEvaluator(categorizationAnalyzer);
         } else {
             this.evaluator = null;
         }
@@ -114,7 +124,7 @@ public IntVector nonEmpty() {
 
     @Override
     public BitArray seenGroupIds(BigArrays bigArrays) {
-        return new SeenGroupIds.Range(seenNull ? 0 : 1, Math.toIntExact(categorizer.getCategoryCount() + 1)).seenGroupIds(bigArrays);
+        return new Range(seenNull ? 0 : 1, Math.toIntExact(categorizer.getCategoryCount() + 1)).seenGroupIds(bigArrays);
     }
 
     @Override
@@ -222,7 +232,7 @@ private Block buildFinalBlock() {
             try (BytesRefBlock.Builder result = blockFactory.newBytesRefBlockBuilder(categorizer.getCategoryCount())) {
                 result.appendNull();
                 for (SerializableTokenListCategory category : categorizer.toCategoriesById()) {
-                    scratch.copyChars(category.getRegex());
+                    scratch.copyChars(getKeyString(category));
                     result.appendBytesRef(scratch.get());
                     scratch.clear();
                 }
@@ -232,7 +242,7 @@ private Block buildFinalBlock() {
 
         try (BytesRefVector.Builder result = blockFactory.newBytesRefVectorBuilder(categorizer.getCategoryCount())) {
             for (SerializableTokenListCategory category : categorizer.toCategoriesById()) {
-                scratch.copyChars(category.getRegex());
+                scratch.copyChars(getKeyString(category));
                 result.appendBytesRef(scratch.get());
                 scratch.clear();
             }
@@ -240,6 +250,13 @@ private Block buildFinalBlock() {
         }
     }
 
+    private String getKeyString(SerializableTokenListCategory category) {
+        return switch (categorizeDef.outputFormat()) {
+            case REGEX -> category.getRegex();
+            case TOKENS -> category.getKeyTokensString();
+        };
+    }
+
     /**
      * Similar implementation to an Evaluator.
      */
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizePackedValuesBlockHash.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizePackedValuesBlockHash.java
index 20874cb10ceb8..bb5f0dee8ca2d 100644
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizePackedValuesBlockHash.java
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizePackedValuesBlockHash.java
@@ -56,6 +56,8 @@ public class CategorizePackedValuesBlockHash extends BlockHash {
         int emitBatchSize
     ) {
         super(blockFactory);
+        assert specs.get(0).categorizeDef() != null;
+
         this.specs = specs;
         this.aggregatorMode = aggregatorMode;
         blocks = new Block[specs.size()];
@@ -68,7 +70,13 @@ public class CategorizePackedValuesBlockHash extends BlockHash {
 
         boolean success = false;
         try {
-            categorizeBlockHash = new CategorizeBlockHash(blockFactory, specs.get(0).channel(), aggregatorMode, analysisRegistry);
+            categorizeBlockHash = new CategorizeBlockHash(
+                blockFactory,
+                specs.get(0).channel(),
+                aggregatorMode,
+                specs.get(0).categorizeDef(),
+                analysisRegistry
+            );
             packedValuesBlockHash = new PackedValuesBlockHash(delegateSpecs, blockFactory, emitBatchSize);
             success = true;
         } finally {
diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeBlockHashTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeBlockHashTests.java
index 842952f9ef8bd..9ce086307acee 100644
--- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeBlockHashTests.java
+++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeBlockHashTests.java
@@ -76,7 +76,13 @@ private void initAnalysisRegistry() throws IOException {
         ).getAnalysisRegistry();
     }
 
+    private BlockHash.CategorizeDef getCategorizeDef() {
+        return new BlockHash.CategorizeDef(null, randomFrom(BlockHash.CategorizeDef.OutputFormat.values()), 70);
+    }
+
     public void testCategorizeRaw() {
+        BlockHash.CategorizeDef categorizeDef = getCategorizeDef();
+
         final Page page;
         boolean withNull = randomBoolean();
         final int positions = 7 + (withNull ? 1 : 0);
@@ -98,7 +104,7 @@ public void testCategorizeRaw() {
             page = new Page(builder.build());
         }
 
-        try (var hash = new CategorizeBlockHash(blockFactory, 0, AggregatorMode.SINGLE, analysisRegistry)) {
+        try (var hash = new CategorizeBlockHash(blockFactory, 0, AggregatorMode.SINGLE, categorizeDef, analysisRegistry)) {
             for (int i = randomInt(2); i < 3; i++) {
                 hash.add(page, new GroupingAggregatorFunction.AddInput() {
                     private void addBlock(int positionOffset, IntBlock groupIds) {
@@ -137,7 +143,10 @@ public void close() {
                     }
                 });
 
-                assertHashState(hash, withNull, ".*?Connected.+?to.*?", ".*?Connection.+?error.*?", ".*?Disconnected.*?");
+                switch (categorizeDef.outputFormat()) {
+                    case REGEX -> assertHashState(hash, withNull, ".*?Connected.+?to.*?", ".*?Connection.+?error.*?", ".*?Disconnected.*?");
+                    case TOKENS -> assertHashState(hash, withNull, "Connected to", "Connection error", "Disconnected");
+                }
             }
         } finally {
             page.releaseBlocks();
@@ -145,6 +154,8 @@ public void close() {
     }
 
     public void testCategorizeRawMultivalue() {
+        BlockHash.CategorizeDef categorizeDef = getCategorizeDef();
+
         final Page page;
         boolean withNull = randomBoolean();
         final int positions = 3 + (withNull ? 1 : 0);
@@ -170,7 +181,7 @@ public void testCategorizeRawMultivalue() {
             page = new Page(builder.build());
         }
 
-        try (var hash = new CategorizeBlockHash(blockFactory, 0, AggregatorMode.SINGLE, analysisRegistry)) {
+        try (var hash = new CategorizeBlockHash(blockFactory, 0, AggregatorMode.SINGLE, categorizeDef, analysisRegistry)) {
             for (int i = randomInt(2); i < 3; i++) {
                 hash.add(page, new GroupingAggregatorFunction.AddInput() {
                     private void addBlock(int positionOffset, IntBlock groupIds) {
@@ -216,7 +227,10 @@ public void close() {
                     }
                 });
 
-                assertHashState(hash, withNull, ".*?Connected.+?to.*?", ".*?Connection.+?error.*?", ".*?Disconnected.*?");
+                switch (categorizeDef.outputFormat()) {
+                    case REGEX -> assertHashState(hash, withNull, ".*?Connected.+?to.*?", ".*?Connection.+?error.*?", ".*?Disconnected.*?");
+                    case TOKENS -> assertHashState(hash, withNull, "Connected to", "Connection error", "Disconnected");
+                }
             }
         } finally {
             page.releaseBlocks();
@@ -224,6 +238,8 @@ public void close() {
     }
 
     public void testCategorizeIntermediate() {
+        BlockHash.CategorizeDef categorizeDef = getCategorizeDef();
+
         Page page1;
         boolean withNull = randomBoolean();
         int positions1 = 7 + (withNull ? 1 : 0);
@@ -259,8 +275,8 @@ public void testCategorizeIntermediate() {
 
         // Fill intermediatePages with the intermediate state from the raw hashes
         try (
-            BlockHash rawHash1 = new CategorizeBlockHash(blockFactory, 0, AggregatorMode.INITIAL, analysisRegistry);
-            BlockHash rawHash2 = new CategorizeBlockHash(blockFactory, 0, AggregatorMode.INITIAL, analysisRegistry);
+            BlockHash rawHash1 = new CategorizeBlockHash(blockFactory, 0, AggregatorMode.INITIAL, categorizeDef, analysisRegistry);
+            BlockHash rawHash2 = new CategorizeBlockHash(blockFactory, 0, AggregatorMode.INITIAL, categorizeDef, analysisRegistry);
         ) {
             rawHash1.add(page1, new GroupingAggregatorFunction.AddInput() {
                 private void addBlock(int positionOffset, IntBlock groupIds) {
@@ -335,7 +351,7 @@ public void close() {
             page2.releaseBlocks();
         }
 
-        try (var intermediateHash = new CategorizeBlockHash(blockFactory, 0, AggregatorMode.FINAL, null)) {
+        try (var intermediateHash = new CategorizeBlockHash(blockFactory, 0, AggregatorMode.FINAL, categorizeDef, null)) {
             intermediateHash.add(intermediatePage1, new GroupingAggregatorFunction.AddInput() {
                 private void addBlock(int positionOffset, IntBlock groupIds) {
                     List<Integer> values = IntStream.range(0, groupIds.getPositionCount())
@@ -403,14 +419,24 @@ public void close() {
                     }
                 });
 
-                assertHashState(
-                    intermediateHash,
-                    withNull,
-                    ".*?Connected.+?to.*?",
-                    ".*?Connection.+?error.*?",
-                    ".*?Disconnected.*?",
-                    ".*?System.+?shutdown.*?"
-                );
+                switch (categorizeDef.outputFormat()) {
+                    case REGEX -> assertHashState(
+                        intermediateHash,
+                        withNull,
+                        ".*?Connected.+?to.*?",
+                        ".*?Connection.+?error.*?",
+                        ".*?Disconnected.*?",
+                        ".*?System.+?shutdown.*?"
+                    );
+                    case TOKENS -> assertHashState(
+                        intermediateHash,
+                        withNull,
+                        "Connected to",
+                        "Connection error",
+                        "Disconnected",
+                        "System shutdown"
+                    );
+                }
             }
         } finally {
             intermediatePage1.releaseBlocks();
@@ -419,6 +445,9 @@ public void close() {
     }
 
     public void testCategorize_withDriver() {
+        BlockHash.CategorizeDef categorizeDef = getCategorizeDef();
+        BlockHash.GroupSpec groupSpec = new BlockHash.GroupSpec(0, ElementType.BYTES_REF, categorizeDef);
+
         BigArrays bigArrays = new MockBigArrays(PageCacheRecycler.NON_RECYCLING_INSTANCE, ByteSizeValue.ofMb(256)).withCircuitBreaking();
         CircuitBreaker breaker = bigArrays.breakerService().getBreaker(CircuitBreaker.REQUEST);
         DriverContext driverContext = new DriverContext(bigArrays, new BlockFactory(breaker, bigArrays));
@@ -477,7 +506,7 @@ public void testCategorize_withDriver() {
             new LocalSourceOperator(input1),
             List.of(
                 new HashAggregationOperator.HashAggregationOperatorFactory(
-                    List.of(makeGroupSpec()),
+                    List.of(groupSpec),
                     AggregatorMode.INITIAL,
                     List.of(
                         new SumLongAggregatorFunctionSupplier().groupingAggregatorFactory(AggregatorMode.INITIAL, List.of(1)),
@@ -496,7 +525,7 @@ public void testCategorize_withDriver() {
             new LocalSourceOperator(input2),
             List.of(
                 new HashAggregationOperator.HashAggregationOperatorFactory(
-                    List.of(makeGroupSpec()),
+                    List.of(groupSpec),
                     AggregatorMode.INITIAL,
                     List.of(
                         new SumLongAggregatorFunctionSupplier().groupingAggregatorFactory(AggregatorMode.INITIAL, List.of(1)),
@@ -517,7 +546,7 @@ public void testCategorize_withDriver() {
             new CannedSourceOperator(intermediateOutput.iterator()),
             List.of(
                 new HashAggregationOperator.HashAggregationOperatorFactory(
-                    List.of(makeGroupSpec()),
+                    List.of(groupSpec),
                     AggregatorMode.FINAL,
                     List.of(
                         new SumLongAggregatorFunctionSupplier().groupingAggregatorFactory(AggregatorMode.FINAL, List.of(1, 2)),
@@ -544,23 +573,36 @@ public void testCategorize_withDriver() {
             sums.put(outputTexts.getBytesRef(i, new BytesRef()).utf8ToString(), outputSums.getLong(i));
             maxs.put(outputTexts.getBytesRef(i, new BytesRef()).utf8ToString(), outputMaxs.getLong(i));
         }
+        List<String> keys = switch (categorizeDef.outputFormat()) {
+            case REGEX -> List.of(
+                ".*?aaazz.*?",
+                ".*?bbbzz.*?",
+                ".*?ccczz.*?",
+                ".*?dddzz.*?",
+                ".*?eeezz.*?",
+                ".*?words.+?words.+?words.+?goodbye.*?",
+                ".*?words.+?words.+?words.+?hello.*?"
+            );
+            case TOKENS -> List.of("aaazz", "bbbzz", "ccczz", "dddzz", "eeezz", "words words words goodbye", "words words words hello");
+        };
+
         assertThat(
             sums,
             equalTo(
                 Map.of(
-                    ".*?aaazz.*?",
+                    keys.get(0),
                     1L,
-                    ".*?bbbzz.*?",
+                    keys.get(1),
                     2L,
-                    ".*?ccczz.*?",
+                    keys.get(2),
                     33L,
-                    ".*?dddzz.*?",
+                    keys.get(3),
                     44L,
-                    ".*?eeezz.*?",
+                    keys.get(4),
                     5L,
-                    ".*?words.+?words.+?words.+?goodbye.*?",
+                    keys.get(5),
                     8888L,
-                    ".*?words.+?words.+?words.+?hello.*?",
+                    keys.get(6),
                     999L
                 )
             )
@@ -569,19 +611,19 @@ public void testCategorize_withDriver() {
             maxs,
             equalTo(
                 Map.of(
-                    ".*?aaazz.*?",
+                    keys.get(0),
                     1L,
-                    ".*?bbbzz.*?",
+                    keys.get(1),
                     2L,
-                    ".*?ccczz.*?",
+                    keys.get(2),
                     30L,
-                    ".*?dddzz.*?",
+                    keys.get(3),
                     40L,
-                    ".*?eeezz.*?",
+                    keys.get(4),
                     5L,
-                    ".*?words.+?words.+?words.+?goodbye.*?",
+                    keys.get(5),
                     8000L,
-                    ".*?words.+?words.+?words.+?hello.*?",
+                    keys.get(6),
                     900L
                 )
             )
@@ -589,10 +631,6 @@ public void testCategorize_withDriver() {
         Releasables.close(() -> Iterators.map(finalOutput.iterator(), (Page p) -> p::releaseBlocks));
     }
 
-    private BlockHash.GroupSpec makeGroupSpec() {
-        return new BlockHash.GroupSpec(0, ElementType.BYTES_REF, true);
-    }
-
     private void assertHashState(CategorizeBlockHash hash, boolean withNull, String... expectedKeys) {
         // Check the keys
         Block[] blocks = null;
diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/CategorizePackedValuesBlockHashTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/CategorizePackedValuesBlockHashTests.java
index 734b0660d24a3..d0eb89eafd841 100644
--- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/CategorizePackedValuesBlockHashTests.java
+++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/CategorizePackedValuesBlockHashTests.java
@@ -74,10 +74,15 @@ public void testCategorize_withDriver() {
         DriverContext driverContext = new DriverContext(bigArrays, new BlockFactory(breaker, bigArrays));
         boolean withNull = randomBoolean();
         boolean withMultivalues = randomBoolean();
+        BlockHash.CategorizeDef categorizeDef = new BlockHash.CategorizeDef(
+            null,
+            randomFrom(BlockHash.CategorizeDef.OutputFormat.values()),
+            70
+        );
 
         List<BlockHash.GroupSpec> groupSpecs = List.of(
-            new BlockHash.GroupSpec(0, ElementType.BYTES_REF, true),
-            new BlockHash.GroupSpec(1, ElementType.INT, false)
+            new BlockHash.GroupSpec(0, ElementType.BYTES_REF, categorizeDef),
+            new BlockHash.GroupSpec(1, ElementType.INT, null)
         );
 
         LocalSourceOperator.BlockSupplier input1 = () -> {
@@ -218,8 +223,12 @@ public void testCategorize_withDriver() {
         }
         Releasables.close(() -> Iterators.map(finalOutput.iterator(), (Page p) -> p::releaseBlocks));
 
+        List<String> keys = switch (categorizeDef.outputFormat()) {
+            case REGEX -> List.of(".*?connected.+?to.*?", ".*?connection.+?error.*?", ".*?disconnected.*?");
+            case TOKENS -> List.of("connected to", "connection error", "disconnected");
+        };
         Map<String, Map<Integer, Set<String>>> expectedResult = Map.of(
-            ".*?connected.+?to.*?",
+            keys.get(0),
             Map.of(
                 7,
                 Set.of("connected to 1.1.1", "connected to 1.1.2", "connected to 1.1.4", "connected to 2.1.2"),
@@ -228,9 +237,9 @@ public void testCategorize_withDriver() {
                 111,
                 Set.of("connected to 2.1.1")
             ),
-            ".*?connection.+?error.*?",
+            keys.get(1),
             Map.of(7, Set.of("connection error"), 42, Set.of("connection error")),
-            ".*?disconnected.*?",
+            keys.get(2),
             Map.of(7, Set.of("disconnected"))
         );
         if (withNull) {
diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/TopNBlockHashTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/TopNBlockHashTests.java
index f96b9d26f075c..0ebfa7e72b805 100644
--- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/TopNBlockHashTests.java
+++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/TopNBlockHashTests.java
@@ -363,7 +363,7 @@ private void hashBatchesCallbackOnLast(Consumer<OrdsAndKeys> callback, Block[]..
     private BlockHash buildBlockHash(int emitBatchSize, Block... values) {
         List<BlockHash.GroupSpec> specs = new ArrayList<>(values.length);
         for (int c = 0; c < values.length; c++) {
-            specs.add(new BlockHash.GroupSpec(c, values[c].elementType(), false, topNDef(c)));
+            specs.add(new BlockHash.GroupSpec(c, values[c].elementType(), null, topNDef(c)));
         }
         assert forcePackedHash == false : "Packed TopN hash not implemented yet";
         /*return forcePackedHash
diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/HashAggregationOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/HashAggregationOperatorTests.java
index 106b9613d7bb2..0e9c0e33d22cd 100644
--- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/HashAggregationOperatorTests.java
+++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/HashAggregationOperatorTests.java
@@ -113,7 +113,7 @@ public void testTopNNullsLast() {
 
         try (
             var operator = new HashAggregationOperator.HashAggregationOperatorFactory(
-                List.of(new BlockHash.GroupSpec(groupChannel, ElementType.LONG, false, new BlockHash.TopNDef(0, ascOrder, false, 3))),
+                List.of(new BlockHash.GroupSpec(groupChannel, ElementType.LONG, null, new BlockHash.TopNDef(0, ascOrder, false, 3))),
                 mode,
                 List.of(
                     new SumLongAggregatorFunctionSupplier().groupingAggregatorFactory(mode, aggregatorChannels),
@@ -190,7 +190,7 @@ public void testTopNNullsFirst() {
 
         try (
             var operator = new HashAggregationOperator.HashAggregationOperatorFactory(
-                List.of(new BlockHash.GroupSpec(groupChannel, ElementType.LONG, false, new BlockHash.TopNDef(0, ascOrder, true, 3))),
+                List.of(new BlockHash.GroupSpec(groupChannel, ElementType.LONG, null, new BlockHash.TopNDef(0, ascOrder, true, 3))),
                 mode,
                 List.of(
                     new SumLongAggregatorFunctionSupplier().groupingAggregatorFactory(mode, aggregatorChannels),
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/categorize.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/categorize.csv-spec
index 7168ca3dc398f..be46e68a8b08a 100644
--- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/categorize.csv-spec
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/categorize.csv-spec
@@ -397,7 +397,7 @@ FROM sample_data
 ;
 
 COUNT():long | SUM(event_duration):long | category:keyword
-           7 |                 23231327 |  null
+           7 |                 23231327 | null
 ;
 
 on null row
@@ -800,3 +800,82 @@ COUNT():long | VALUES(str):keyword | category:keyword | str:keyword
            1 | [a, b, c]           | null             | b
            1 | [a, b, c]           | null             | c
 ;
+
+with option output_format regex
+required_capability: categorize_options
+
+FROM sample_data
+  | STATS count=COUNT()
+       BY category=CATEGORIZE(message, {"output_format": "regex"})
+  | SORT count DESC, category
+;
+
+count:long | category:keyword
+         3 | .*?Connected.+?to.*?
+         3 | .*?Connection.+?error.*?
+         1 |  .*?Disconnected.*?
+;
+
+with option output_format tokens
+required_capability: categorize_options
+
+FROM sample_data
+  | STATS count=COUNT()
+       BY category=CATEGORIZE(message, {"output_format": "tokens"})
+  | SORT count DESC, category
+;
+
+count:long | category:keyword
+         3 | Connected to
+         3 | Connection error
+         1 | Disconnected
+;
+
+with option similarity_threshold
+required_capability: categorize_options
+
+FROM sample_data
+  | STATS count=COUNT()
+       BY category=CATEGORIZE(message, {"similarity_threshold": 99})
+  | SORT count DESC, category
+;
+
+count:long | category:keyword
+3          | .*?Connection.+?error.*?
+1          | .*?Connected.+?to.+?10\.1\.0\.1.*?
+1          | .*?Connected.+?to.+?10\.1\.0\.2.*?
+1          | .*?Connected.+?to.+?10\.1\.0\.3.*?
+1          | .*?Disconnected.*?
+;
+
+with option analyzer
+required_capability: categorize_options
+
+FROM sample_data
+  | STATS count=COUNT()
+       BY category=CATEGORIZE(message, {"analyzer": "stop"})
+  | SORT count DESC, category
+;
+
+count:long | category:keyword
+3          | .*?connected.*?
+3          | .*?connection.+?error.*?
+1          | .*?disconnected.*?
+;
+
+with all options
+required_capability: categorize_options
+
+FROM sample_data
+  | STATS count=COUNT()
+       BY category=CATEGORIZE(message, {"analyzer": "whitespace", "similarity_threshold": 100, "output_format": "tokens"})
+  | SORT count DESC, category
+;
+
+count:long | category:keyword
+3          | Connection error
+1          | Connected to 10.1.0.1
+1          | Connected to 10.1.0.2
+1          | Connected to 10.1.0.3
+1          | Disconnected
+;
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
index f4f6bd8a12d79..41a92214406bb 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
@@ -1254,10 +1254,12 @@ public enum Cap {
          * FUSE command
          */
         FUSE(Build.current().isSnapshot()),
+
         /**
          * Support improved behavior for LIKE operator when used with index fields.
          */
         LIKE_ON_INDEX_FIELDS,
+
         /**
          * Support avg with aggregate metric doubles
          */
@@ -1274,10 +1276,15 @@ public enum Cap {
          */
         FAIL_IF_ALL_SHARDS_FAIL(Build.current().isSnapshot()),
 
-        /*
+        /**
          * Cosine vector similarity function
          */
-        COSINE_VECTOR_SIMILARITY_FUNCTION(Build.current().isSnapshot());
+        COSINE_VECTOR_SIMILARITY_FUNCTION(Build.current().isSnapshot()),
+
+        /**
+         * Support for the options field of CATEGORIZE.
+         */
+        CATEGORIZE_OPTIONS;
 
         private final boolean enabled;
 
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/Options.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/Options.java
new file mode 100644
index 0000000000000..891d8f1e6c264
--- /dev/null
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/Options.java
@@ -0,0 +1,107 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.esql.expression.function;
+
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.xpack.esql.core.InvalidArgumentException;
+import org.elasticsearch.xpack.esql.core.expression.EntryExpression;
+import org.elasticsearch.xpack.esql.core.expression.Expression;
+import org.elasticsearch.xpack.esql.core.expression.Literal;
+import org.elasticsearch.xpack.esql.core.expression.MapExpression;
+import org.elasticsearch.xpack.esql.core.expression.TypeResolutions;
+import org.elasticsearch.xpack.esql.core.tree.Source;
+import org.elasticsearch.xpack.esql.core.type.DataType;
+import org.elasticsearch.xpack.esql.core.type.DataTypeConverter;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.function.Consumer;
+
+import static org.elasticsearch.common.logging.LoggerMessageFormat.format;
+import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isFoldable;
+import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isMapExpression;
+import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isNotNull;
+
+public class Options {
+
+    public static Expression.TypeResolution resolve(
+        Expression options,
+        Source source,
+        TypeResolutions.ParamOrdinal paramOrdinal,
+        Map<String, DataType> allowedOptions
+    ) {
+        return resolve(options, source, paramOrdinal, allowedOptions, null);
+    }
+
+    public static Expression.TypeResolution resolve(
+        Expression options,
+        Source source,
+        TypeResolutions.ParamOrdinal paramOrdinal,
+        Map<String, DataType> allowedOptions,
+        Consumer<Map<String, Object>> verifyOptions
+    ) {
+        if (options != null) {
+            Expression.TypeResolution resolution = isNotNull(options, source.text(), paramOrdinal);
+            if (resolution.unresolved()) {
+                return resolution;
+            }
+            // MapExpression does not have a DataType associated with it
+            resolution = isMapExpression(options, source.text(), paramOrdinal);
+            if (resolution.unresolved()) {
+                return resolution;
+            }
+            try {
+                Map<String, Object> optionsMap = new HashMap<>();
+                populateMap((MapExpression) options, optionsMap, source, paramOrdinal, allowedOptions);
+                if (verifyOptions != null) {
+                    verifyOptions.accept(optionsMap);
+                }
+            } catch (InvalidArgumentException e) {
+                return new Expression.TypeResolution(e.getMessage());
+            }
+        }
+        return Expression.TypeResolution.TYPE_RESOLVED;
+    }
+
+    public static void populateMap(
+        final MapExpression options,
+        final Map<String, Object> optionsMap,
+        final Source source,
+        final TypeResolutions.ParamOrdinal paramOrdinal,
+        final Map<String, DataType> allowedOptions
+    ) throws InvalidArgumentException {
+        for (EntryExpression entry : options.entryExpressions()) {
+            Expression optionExpr = entry.key();
+            Expression valueExpr = entry.value();
+            Expression.TypeResolution resolution = isFoldable(optionExpr, source.text(), paramOrdinal).and(
+                isFoldable(valueExpr, source.text(), paramOrdinal)
+            );
+            if (resolution.unresolved()) {
+                throw new InvalidArgumentException(resolution.message());
+            }
+            Object optionExprLiteral = ((Literal) optionExpr).value();
+            Object valueExprLiteral = ((Literal) valueExpr).value();
+            String optionName = optionExprLiteral instanceof BytesRef br ? br.utf8ToString() : optionExprLiteral.toString();
+            String optionValue = valueExprLiteral instanceof BytesRef br ? br.utf8ToString() : valueExprLiteral.toString();
+            // validate the optionExpr is supported
+            DataType dataType = allowedOptions.get(optionName);
+            if (dataType == null) {
+                throw new InvalidArgumentException(
+                    format(null, "Invalid option [{}] in [{}], expected one of {}", optionName, source.text(), allowedOptions.keySet())
+                );
+            }
+            try {
+                optionsMap.put(optionName, DataTypeConverter.convert(optionValue, dataType));
+            } catch (InvalidArgumentException e) {
+                throw new InvalidArgumentException(
+                    format(null, "Invalid option [{}] in [{}], {}", optionName, source.text(), e.getMessage())
+                );
+            }
+        }
+    }
+}
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java
index c347340c25050..b5378db783f46 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java
@@ -7,7 +7,6 @@
 
 package org.elasticsearch.xpack.esql.expression.function.fulltext;
 
-import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.common.lucene.BytesRefs;
 import org.elasticsearch.compute.lucene.LuceneQueryEvaluator.ShardConfig;
 import org.elasticsearch.compute.lucene.LuceneQueryExpressionEvaluator;
@@ -20,20 +19,15 @@
 import org.elasticsearch.xpack.esql.capabilities.PostAnalysisPlanVerificationAware;
 import org.elasticsearch.xpack.esql.capabilities.TranslationAware;
 import org.elasticsearch.xpack.esql.common.Failures;
-import org.elasticsearch.xpack.esql.core.InvalidArgumentException;
-import org.elasticsearch.xpack.esql.core.expression.EntryExpression;
 import org.elasticsearch.xpack.esql.core.expression.Expression;
 import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
 import org.elasticsearch.xpack.esql.core.expression.FoldContext;
-import org.elasticsearch.xpack.esql.core.expression.Literal;
-import org.elasticsearch.xpack.esql.core.expression.MapExpression;
 import org.elasticsearch.xpack.esql.core.expression.Nullability;
 import org.elasticsearch.xpack.esql.core.expression.TypeResolutions;
 import org.elasticsearch.xpack.esql.core.expression.function.Function;
 import org.elasticsearch.xpack.esql.core.querydsl.query.Query;
 import org.elasticsearch.xpack.esql.core.tree.Source;
 import org.elasticsearch.xpack.esql.core.type.DataType;
-import org.elasticsearch.xpack.esql.core.type.DataTypeConverter;
 import org.elasticsearch.xpack.esql.core.type.MultiTypeEsField;
 import org.elasticsearch.xpack.esql.evaluator.mapper.EvaluatorMapper;
 import org.elasticsearch.xpack.esql.expression.function.scalar.convert.AbstractConvertFunction;
@@ -55,17 +49,12 @@
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Locale;
-import java.util.Map;
 import java.util.Objects;
 import java.util.function.BiConsumer;
 import java.util.function.Predicate;
 
-import static org.elasticsearch.common.logging.LoggerMessageFormat.format;
 import static org.elasticsearch.xpack.esql.common.Failure.fail;
 import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.DEFAULT;
-import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isFoldable;
-import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isMapExpression;
-import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isNotNull;
 import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isNotNullAndFoldable;
 import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString;
 
@@ -409,66 +398,6 @@ public ScoreOperator.ExpressionScorer.Factory toScorer(ToScorer toScorer) {
         return new LuceneQueryScoreEvaluator.Factory(shardConfigs);
     }
 
-    protected static void populateOptionsMap(
-        final MapExpression options,
-        final Map<String, Object> optionsMap,
-        final TypeResolutions.ParamOrdinal paramOrdinal,
-        final String sourceText,
-        final Map<String, DataType> allowedOptions
-    ) throws InvalidArgumentException {
-        for (EntryExpression entry : options.entryExpressions()) {
-            Expression optionExpr = entry.key();
-            Expression valueExpr = entry.value();
-            TypeResolution resolution = isFoldable(optionExpr, sourceText, paramOrdinal).and(
-                isFoldable(valueExpr, sourceText, paramOrdinal)
-            );
-            if (resolution.unresolved()) {
-                throw new InvalidArgumentException(resolution.message());
-            }
-            Object optionExprLiteral = ((Literal) optionExpr).value();
-            Object valueExprLiteral = ((Literal) valueExpr).value();
-            String optionName = optionExprLiteral instanceof BytesRef br ? br.utf8ToString() : optionExprLiteral.toString();
-            String optionValue = valueExprLiteral instanceof BytesRef br ? br.utf8ToString() : valueExprLiteral.toString();
-            // validate the optionExpr is supported
-            DataType dataType = allowedOptions.get(optionName);
-            if (dataType == null) {
-                throw new InvalidArgumentException(
-                    format(null, "Invalid option [{}] in [{}], expected one of {}", optionName, sourceText, allowedOptions.keySet())
-                );
-            }
-            try {
-                optionsMap.put(optionName, DataTypeConverter.convert(optionValue, dataType));
-            } catch (InvalidArgumentException e) {
-                throw new InvalidArgumentException(format(null, "Invalid option [{}] in [{}], {}", optionName, sourceText, e.getMessage()));
-            }
-        }
-    }
-
-    protected TypeResolution resolveOptions(Expression options, TypeResolutions.ParamOrdinal paramOrdinal) {
-        if (options != null) {
-            TypeResolution resolution = isNotNull(options, sourceText(), paramOrdinal);
-            if (resolution.unresolved()) {
-                return resolution;
-            }
-            // MapExpression does not have a DataType associated with it
-            resolution = isMapExpression(options, sourceText(), paramOrdinal);
-            if (resolution.unresolved()) {
-                return resolution;
-            }
-
-            try {
-                resolvedOptions();
-            } catch (InvalidArgumentException e) {
-                return new TypeResolution(e.getMessage());
-            }
-        }
-        return TypeResolution.TYPE_RESOLVED;
-    }
-
-    protected Map<String, Object> resolvedOptions() throws InvalidArgumentException {
-        return Map.of();
-    }
-
     // TODO: this should likely be replaced by calls to FieldAttribute#fieldName; the MultiTypeEsField case looks
     // wrong if `fieldAttribute` is a subfield, e.g. `parent.child` - multiTypeEsField#getName will just return `child`.
     public static String getNameFromFieldAttribute(FieldAttribute fieldAttribute) {
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/Match.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/Match.java
index 743263a878552..5c5a46fd2f759 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/Match.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/Match.java
@@ -33,6 +33,7 @@
 import org.elasticsearch.xpack.esql.expression.function.FunctionInfo;
 import org.elasticsearch.xpack.esql.expression.function.MapParam;
 import org.elasticsearch.xpack.esql.expression.function.OptionalArgument;
+import org.elasticsearch.xpack.esql.expression.function.Options;
 import org.elasticsearch.xpack.esql.expression.function.Param;
 import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
 import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
@@ -298,7 +299,9 @@ public final void writeTo(StreamOutput out) throws IOException {
 
     @Override
     protected TypeResolution resolveParams() {
-        return resolveField().and(resolveQuery()).and(resolveOptions(options(), THIRD)).and(checkParamCompatibility());
+        return resolveField().and(resolveQuery())
+            .and(Options.resolve(options(), source(), THIRD, ALLOWED_OPTIONS))
+            .and(checkParamCompatibility());
     }
 
     private TypeResolution resolveField() {
@@ -342,11 +345,6 @@ private TypeResolution checkParamCompatibility() {
         return new TypeResolution(formatIncompatibleTypesMessage(fieldType, queryType, sourceText()));
     }
 
-    @Override
-    protected Map<String, Object> resolvedOptions() {
-        return matchQueryOptions();
-    }
-
     private Map<String, Object> matchQueryOptions() throws InvalidArgumentException {
         if (options() == null) {
             return Map.of(LENIENT_FIELD.getPreferredName(), true);
@@ -356,7 +354,7 @@ private Map<String, Object> matchQueryOptions() throws InvalidArgumentException
         // Match is lenient by default to avoid failing on incompatible types
         matchOptions.put(LENIENT_FIELD.getPreferredName(), true);
 
-        populateOptionsMap((MapExpression) options(), matchOptions, SECOND, sourceText(), ALLOWED_OPTIONS);
+        Options.populateMap((MapExpression) options(), matchOptions, source(), SECOND, ALLOWED_OPTIONS);
         return matchOptions;
     }
 
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/MatchPhrase.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/MatchPhrase.java
index a41a9792f7943..4ed0e16ab5b4a 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/MatchPhrase.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/MatchPhrase.java
@@ -30,6 +30,7 @@
 import org.elasticsearch.xpack.esql.expression.function.FunctionInfo;
 import org.elasticsearch.xpack.esql.expression.function.MapParam;
 import org.elasticsearch.xpack.esql.expression.function.OptionalArgument;
+import org.elasticsearch.xpack.esql.expression.function.Options;
 import org.elasticsearch.xpack.esql.expression.function.Param;
 import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
 import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
@@ -187,7 +188,7 @@ public final void writeTo(StreamOutput out) throws IOException {
 
     @Override
     protected TypeResolution resolveParams() {
-        return resolveField().and(resolveQuery()).and(resolveOptions(options(), THIRD));
+        return resolveField().and(resolveQuery()).and(Options.resolve(options(), source(), THIRD, ALLOWED_OPTIONS));
     }
 
     private TypeResolution resolveField() {
@@ -200,18 +201,13 @@ private TypeResolution resolveQuery() {
         );
     }
 
-    @Override
-    protected Map<String, Object> resolvedOptions() throws InvalidArgumentException {
-        return matchPhraseQueryOptions();
-    }
-
     private Map<String, Object> matchPhraseQueryOptions() throws InvalidArgumentException {
         if (options() == null) {
             return Map.of();
         }
 
         Map<String, Object> matchPhraseOptions = new HashMap<>();
-        populateOptionsMap((MapExpression) options(), matchPhraseOptions, SECOND, sourceText(), ALLOWED_OPTIONS);
+        Options.populateMap((MapExpression) options(), matchPhraseOptions, source(), SECOND, ALLOWED_OPTIONS);
         return matchPhraseOptions;
     }
 
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/MultiMatch.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/MultiMatch.java
index 1178178c432fc..3e9fed6be850d 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/MultiMatch.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/MultiMatch.java
@@ -29,6 +29,7 @@
 import org.elasticsearch.xpack.esql.expression.function.FunctionInfo;
 import org.elasticsearch.xpack.esql.expression.function.MapParam;
 import org.elasticsearch.xpack.esql.expression.function.OptionalArgument;
+import org.elasticsearch.xpack.esql.expression.function.Options;
 import org.elasticsearch.xpack.esql.expression.function.Param;
 import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
 import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
@@ -368,7 +369,7 @@ private Map<String, Object> getOptions() throws InvalidArgumentException {
             return options;
         }
 
-        Match.populateOptionsMap((MapExpression) options(), options, THIRD, sourceText(), OPTIONS);
+        Options.populateMap((MapExpression) options(), options, source(), THIRD, OPTIONS);
         return options;
     }
 
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryString.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryString.java
index a4c1b1f12fb56..7285f19fc5aa7 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryString.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryString.java
@@ -26,6 +26,7 @@
 import org.elasticsearch.xpack.esql.expression.function.FunctionInfo;
 import org.elasticsearch.xpack.esql.expression.function.MapParam;
 import org.elasticsearch.xpack.esql.expression.function.OptionalArgument;
+import org.elasticsearch.xpack.esql.expression.function.Options;
 import org.elasticsearch.xpack.esql.expression.function.Param;
 import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
 import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
@@ -321,18 +322,13 @@ private Map<String, Object> queryStringOptions() throws InvalidArgumentException
         }
 
         Map<String, Object> matchOptions = new HashMap<>();
-        populateOptionsMap((MapExpression) options(), matchOptions, SECOND, sourceText(), ALLOWED_OPTIONS);
+        Options.populateMap((MapExpression) options(), matchOptions, source(), SECOND, ALLOWED_OPTIONS);
         return matchOptions;
     }
 
-    @Override
-    protected Map<String, Object> resolvedOptions() {
-        return queryStringOptions();
-    }
-
     @Override
     protected TypeResolution resolveParams() {
-        return resolveQuery().and(resolveOptions(options(), SECOND));
+        return resolveQuery().and(Options.resolve(options(), source(), SECOND, ALLOWED_OPTIONS));
     }
 
     @Override
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Categorize.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Categorize.java
index 15b4621589457..75918091f9ecd 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Categorize.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Categorize.java
@@ -7,13 +7,18 @@
 
 package org.elasticsearch.xpack.esql.expression.function.grouping;
 
+import org.elasticsearch.TransportVersions;
 import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.compute.aggregation.blockhash.BlockHash.CategorizeDef;
+import org.elasticsearch.compute.aggregation.blockhash.BlockHash.CategorizeDef.OutputFormat;
 import org.elasticsearch.license.XPackLicenseState;
 import org.elasticsearch.xpack.esql.LicenseAware;
 import org.elasticsearch.xpack.esql.SupportsObservabilityTier;
+import org.elasticsearch.xpack.esql.core.InvalidArgumentException;
 import org.elasticsearch.xpack.esql.core.expression.Expression;
+import org.elasticsearch.xpack.esql.core.expression.MapExpression;
 import org.elasticsearch.xpack.esql.core.expression.Nullability;
 import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
 import org.elasticsearch.xpack.esql.core.tree.Source;
@@ -21,16 +26,29 @@
 import org.elasticsearch.xpack.esql.expression.function.Example;
 import org.elasticsearch.xpack.esql.expression.function.FunctionInfo;
 import org.elasticsearch.xpack.esql.expression.function.FunctionType;
+import org.elasticsearch.xpack.esql.expression.function.MapParam;
+import org.elasticsearch.xpack.esql.expression.function.OptionalArgument;
+import org.elasticsearch.xpack.esql.expression.function.Options;
 import org.elasticsearch.xpack.esql.expression.function.Param;
 import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
 import org.elasticsearch.xpack.ml.MachineLearning;
 
 import java.io.IOException;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.TreeMap;
 
+import static java.util.Map.entry;
+import static org.elasticsearch.common.logging.LoggerMessageFormat.format;
+import static org.elasticsearch.compute.aggregation.blockhash.BlockHash.CategorizeDef.OutputFormat.REGEX;
 import static org.elasticsearch.xpack.esql.SupportsObservabilityTier.ObservabilityTier.COMPLETE;
 import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.DEFAULT;
+import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND;
 import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString;
+import static org.elasticsearch.xpack.esql.core.type.DataType.INTEGER;
+import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD;
 
 /**
  * Categorizes text messages.
@@ -42,14 +60,23 @@
  * </p>
  */
 @SupportsObservabilityTier(tier = COMPLETE)
-public class Categorize extends GroupingFunction.NonEvaluatableGroupingFunction implements LicenseAware {
+public class Categorize extends GroupingFunction.NonEvaluatableGroupingFunction implements OptionalArgument, LicenseAware {
     public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(
         Expression.class,
         "Categorize",
         Categorize::new
     );
 
+    private static final String ANALYZER = "analyzer";
+    private static final String OUTPUT_FORMAT = "output_format";
+    private static final String SIMILARITY_THRESHOLD = "similarity_threshold";
+
+    private static final Map<String, DataType> ALLOWED_OPTIONS = new TreeMap<>(
+        Map.ofEntries(entry(ANALYZER, KEYWORD), entry(OUTPUT_FORMAT, KEYWORD), entry(SIMILARITY_THRESHOLD, INTEGER))
+    );
+
     private final Expression field;
+    private final Expression options;
 
     @FunctionInfo(
         returnType = "keyword",
@@ -70,21 +97,56 @@ public class Categorize extends GroupingFunction.NonEvaluatableGroupingFunction
     )
     public Categorize(
         Source source,
-        @Param(name = "field", type = { "text", "keyword" }, description = "Expression to categorize") Expression field
-
+        @Param(name = "field", type = { "text", "keyword" }, description = "Expression to categorize") Expression field,
+        @MapParam(
+            name = "options",
+            description = "(Optional) Categorize additional options as <<esql-function-named-params,function named parameters>>.",
+            params = {
+                @MapParam.MapParamEntry(
+                    name = ANALYZER,
+                    type = "keyword",
+                    valueHint = { "standard" },
+                    description = "Analyzer used to convert the field into tokens for text categorization."
+                ),
+                @MapParam.MapParamEntry(
+                    name = OUTPUT_FORMAT,
+                    type = "keyword",
+                    valueHint = { "regex", "tokens" },
+                    description = "The output format of the categories. Defaults to regex."
+                ),
+                @MapParam.MapParamEntry(
+                    name = SIMILARITY_THRESHOLD,
+                    type = "integer",
+                    valueHint = { "70" },
+                    description = "The minimum percentage of token weight that must match for text to be added to the category bucket. "
+                        + "Must be between 1 and 100. The larger the value the narrower the categories. "
+                        + "Larger values will increase memory usage and create narrower categories. Defaults to 70."
+                ), },
+            optional = true
+        ) Expression options
     ) {
-        super(source, List.of(field));
+        super(source, options == null ? List.of(field) : List.of(field, options));
         this.field = field;
+        this.options = options;
     }
 
     private Categorize(StreamInput in) throws IOException {
-        this(Source.readFrom((PlanStreamInput) in), in.readNamedWriteable(Expression.class));
+        this(
+            Source.readFrom((PlanStreamInput) in),
+            in.readNamedWriteable(Expression.class),
+            in.getTransportVersion().onOrAfter(TransportVersions.ESQL_CATEGORIZE_OPTIONS)
+                ? in.readOptionalNamedWriteable(Expression.class)
+                : null
+        );
     }
 
     @Override
     public void writeTo(StreamOutput out) throws IOException {
         source().writeTo(out);
         out.writeNamedWriteable(field);
+        if (out.getTransportVersion().onOrAfter(TransportVersions.ESQL_CATEGORIZE_OPTIONS)) {
+            out.writeOptionalNamedWriteable(options);
+        }
     }
 
     @Override
@@ -107,7 +169,48 @@ public Nullability nullable() {
 
     @Override
     protected TypeResolution resolveType() {
-        return isString(field(), sourceText(), DEFAULT);
+        return isString(field(), sourceText(), DEFAULT).and(
+            Options.resolve(options, source(), SECOND, ALLOWED_OPTIONS, this::verifyOptions)
+        );
+    }
+
+    private void verifyOptions(Map<String, Object> optionsMap) {
+        if (options == null) {
+            return;
+        }
+        Integer similarityThreshold = (Integer) optionsMap.get(SIMILARITY_THRESHOLD);
+        if (similarityThreshold != null) {
+            if (similarityThreshold <= 0 || similarityThreshold > 100) {
+                throw new InvalidArgumentException(
+                    format("invalid similarity threshold [{}], expecting a number between 1 and 100, inclusive", similarityThreshold)
+                );
+            }
+        }
+        String outputFormat = (String) optionsMap.get(OUTPUT_FORMAT);
+        if (outputFormat != null) {
+            try {
+                OutputFormat.valueOf(outputFormat.toUpperCase(Locale.ROOT));
+            } catch (IllegalArgumentException e) {
+                throw new InvalidArgumentException(
+                    format(null, "invalid output format [{}], expecting one of [REGEX, TOKENS]", outputFormat)
+                );
+            }
+        }
+    }
+
+    public CategorizeDef categorizeDef() {
+        Map<String, Object> optionsMap = new HashMap<>();
+        if (options != null) {
+            Options.populateMap((MapExpression) options, optionsMap, source(), SECOND, ALLOWED_OPTIONS);
+        }
+        Integer similarityThreshold = (Integer) optionsMap.get(SIMILARITY_THRESHOLD);
+        String outputFormatString = (String) optionsMap.get(OUTPUT_FORMAT);
+        OutputFormat outputFormat = outputFormatString == null ? null : OutputFormat.valueOf(outputFormatString.toUpperCase(Locale.ROOT));
+        return new CategorizeDef(
+            (String) optionsMap.get("analyzer"),
+            outputFormat == null ? REGEX : outputFormat,
+            similarityThreshold == null ? 70 : similarityThreshold
+        );
     }
 
     @Override
@@ -117,12 +220,12 @@ public DataType dataType() {
 
     @Override
     public Categorize replaceChildren(List<Expression> newChildren) {
-        return new Categorize(source(), newChildren.get(0));
+        return new Categorize(source(), newChildren.get(0), newChildren.size() > 1 ? newChildren.get(1) : null);
     }
 
     @Override
     protected NodeInfo<? extends Expression> info() {
-        return NodeInfo.create(this, Categorize::new, field);
+        return NodeInfo.create(this, Categorize::new, field, options);
     }
 
     public Expression field() {
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java
index 61528521c3749..cab5ec862d7f5 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java
@@ -30,6 +30,7 @@
 import org.elasticsearch.xpack.esql.expression.function.FunctionInfo;
 import org.elasticsearch.xpack.esql.expression.function.MapParam;
 import org.elasticsearch.xpack.esql.expression.function.OptionalArgument;
+import org.elasticsearch.xpack.esql.expression.function.Options;
 import org.elasticsearch.xpack.esql.expression.function.Param;
 import org.elasticsearch.xpack.esql.expression.function.fulltext.FullTextFunction;
 import org.elasticsearch.xpack.esql.expression.function.fulltext.Match;
@@ -53,10 +54,10 @@
 import static org.elasticsearch.search.vectors.KnnVectorQueryBuilder.NUM_CANDS_FIELD;
 import static org.elasticsearch.search.vectors.KnnVectorQueryBuilder.VECTOR_SIMILARITY_FIELD;
 import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST;
+import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FOURTH;
 import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND;
 import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.THIRD;
 import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isFoldable;
-import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isMapExpression;
 import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isNotNull;
 import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isNotNullAndFoldable;
 import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isType;
@@ -198,7 +199,7 @@ public DataType dataType() {
 
     @Override
     protected TypeResolution resolveParams() {
-        return resolveField().and(resolveQuery()).and(resolveK()).and(resolveOptions());
+        return resolveField().and(resolveQuery()).and(resolveK()).and(Options.resolve(options(), source(), FOURTH, ALLOWED_OPTIONS));
     }
 
     private TypeResolution resolveField() {
@@ -221,37 +222,6 @@ private TypeResolution resolveK() {
             .and(isNotNull(k(), sourceText(), THIRD));
     }
 
-    private TypeResolution resolveOptions() {
-        if (options() != null) {
-            TypeResolution resolution = isNotNull(options(), sourceText(), TypeResolutions.ParamOrdinal.FOURTH);
-            if (resolution.unresolved()) {
-                return resolution;
-            }
-            // MapExpression does not have a DataType associated with it
-            resolution = isMapExpression(options(), sourceText(), TypeResolutions.ParamOrdinal.FOURTH);
-            if (resolution.unresolved()) {
-                return resolution;
-            }
-
-            try {
-                knnQueryOptions();
-            } catch (InvalidArgumentException e) {
-                return new TypeResolution(e.getMessage());
-            }
-        }
-        return TypeResolution.TYPE_RESOLVED;
-    }
-
-    private Map<String, Object> knnQueryOptions() throws InvalidArgumentException {
-        if (options() == null) {
-            return Map.of();
-        }
-
-        Map<String, Object> matchOptions = new HashMap<>();
-        populateOptionsMap((MapExpression) options(), matchOptions, TypeResolutions.ParamOrdinal.FOURTH, sourceText(), ALLOWED_OPTIONS);
-        return matchOptions;
-    }
-
     @Override
     public Expression replaceQueryBuilder(QueryBuilder queryBuilder) {
         return new Knn(source(), field(), query(), k(), options(), queryBuilder, filterExpressions());
@@ -307,7 +277,7 @@ public Expression withFilters(List<Expression> filterExpressions) {
     private Map<String, Object> queryOptions() throws InvalidArgumentException {
         Map<String, Object> options = new HashMap<>();
         if (options() != null) {
-            populateOptionsMap((MapExpression) options(), options, TypeResolutions.ParamOrdinal.FOURTH, sourceText(), ALLOWED_OPTIONS);
+            Options.populateMap((MapExpression) options(), options, source(), FOURTH, ALLOWED_OPTIONS);
         }
         return options;
     }
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateNestedExpressionWithEval.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateNestedExpressionWithEval.java
index dd7ee26aa84bd..8fe9ccc18c006 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateNestedExpressionWithEval.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateNestedExpressionWithEval.java
@@ -10,6 +10,7 @@
 import org.elasticsearch.xpack.esql.core.expression.Alias;
 import org.elasticsearch.xpack.esql.core.expression.Attribute;
 import org.elasticsearch.xpack.esql.core.expression.Expression;
+import org.elasticsearch.xpack.esql.core.expression.MapExpression;
 import org.elasticsearch.xpack.esql.core.expression.NamedExpression;
 import org.elasticsearch.xpack.esql.core.util.Holder;
 import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction;
@@ -137,13 +138,13 @@ private static Expression transformNonEvaluatableGroupingFunction(
         List<Expression> newChildren = new ArrayList<>(gf.children().size());
 
         for (Expression ex : gf.children()) {
-            if (ex instanceof Attribute == false) { // TODO: foldables shouldn't require eval'ing either
+            if (ex instanceof Attribute || ex instanceof MapExpression) {
+                newChildren.add(ex);
+            } else { // TODO: foldables shouldn't require eval'ing either
                 var alias = new Alias(ex.source(), syntheticName(ex, gf, counter++), ex, null, true);
                 evals.add(alias);
                 newChildren.add(alias.toAttribute());
                 childrenChanged = true;
-            } else {
-                newChildren.add(ex);
             }
         }
 
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AbstractPhysicalOperationProviders.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AbstractPhysicalOperationProviders.java
index a5d19fcc3fb14..e45fe2b0e81d8 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AbstractPhysicalOperationProviders.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AbstractPhysicalOperationProviders.java
@@ -343,8 +343,12 @@ BlockHash.GroupSpec toHashGroupSpec() {
             if (channel == null) {
                 throw new EsqlIllegalArgumentException("planned to use ordinals but tried to use the hash instead");
             }
-
-            return new BlockHash.GroupSpec(channel, elementType(), Alias.unwrap(expression) instanceof Categorize, null);
+            return new BlockHash.GroupSpec(
+                channel,
+                elementType(),
+                Alias.unwrap(expression) instanceof Categorize categorize ? categorize.categorizeDef() : null,
+                null
+            );
         }
 
         ElementType elementType() {
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java
index 4951479514b72..5d4260eb4ee66 100644
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java
@@ -1972,6 +1972,57 @@ public void testCategorizeWithFilteredAggregations() {
         );
     }
 
+    public void testCategorizeInvalidOptionsField() {
+        assumeTrue("categorize options must be enabled", EsqlCapabilities.Cap.CATEGORIZE_OPTIONS.isEnabled());
+
+        assertEquals(
+            "1:31: second argument of [CATEGORIZE(last_name, first_name)] must be a map expression, received [first_name]",
+            error("FROM test | STATS COUNT(*) BY CATEGORIZE(last_name, first_name)")
+        );
+        assertEquals(
+            "1:31: Invalid option [blah] in [CATEGORIZE(last_name, { \"blah\": 42 })], "
+                + "expected one of [analyzer, output_format, similarity_threshold]",
+            error("FROM test | STATS COUNT(*) BY CATEGORIZE(last_name, { \"blah\": 42 })")
+        );
+    }
+
+    public void testCategorizeOptionOutputFormat() {
+        assumeTrue("categorize options must be enabled", EsqlCapabilities.Cap.CATEGORIZE_OPTIONS.isEnabled());
+
+        query("FROM test | STATS COUNT(*) BY CATEGORIZE(last_name, { \"output_format\": \"regex\" })");
+        query("FROM test | STATS COUNT(*) BY CATEGORIZE(last_name, { \"output_format\": \"REGEX\" })");
+        query("FROM test | STATS COUNT(*) BY CATEGORIZE(last_name, { \"output_format\": \"tokens\" })");
+        query("FROM test | STATS COUNT(*) BY CATEGORIZE(last_name, { \"output_format\": \"ToKeNs\" })");
+        assertEquals(
+            "1:31: invalid output format [blah], expecting one of [REGEX, TOKENS]",
+            error("FROM test | STATS COUNT(*) BY CATEGORIZE(last_name, { \"output_format\": \"blah\" })")
+        );
+        assertEquals(
+            "1:31: invalid output format [42], expecting one of [REGEX, TOKENS]",
+            error("FROM test | STATS COUNT(*) BY CATEGORIZE(last_name, { \"output_format\": 42 })")
+        );
+    }
+
+    public void testCategorizeOptionSimilarityThreshold() {
+        assumeTrue("categorize options must be enabled", EsqlCapabilities.Cap.CATEGORIZE_OPTIONS.isEnabled());
+
+        query("FROM test | STATS COUNT(*) BY CATEGORIZE(last_name, { \"similarity_threshold\": 1 })");
+        query("FROM test | STATS COUNT(*) BY CATEGORIZE(last_name, { \"similarity_threshold\": 100 })");
+        assertEquals(
+            "1:31: invalid similarity threshold [0], expecting a number between 1 and 100, inclusive",
+            error("FROM test | STATS COUNT(*) BY CATEGORIZE(last_name, { \"similarity_threshold\": 0 })")
+        );
+        assertEquals(
+            "1:31: invalid similarity threshold [101], expecting a number between 1 and 100, inclusive",
+            error("FROM test | STATS COUNT(*) BY CATEGORIZE(last_name, { \"similarity_threshold\": 101 })")
+        );
+        assertEquals(
+            "1:31: Invalid option [similarity_threshold] in [CATEGORIZE(last_name, { \"similarity_threshold\": \"blah\" })], "
+                + "cannot cast [blah] to [integer]",
+            error("FROM test | STATS COUNT(*) BY CATEGORIZE(last_name, { \"similarity_threshold\": \"blah\" })")
+        );
+    }
+
     public void testChangePoint() {
         assumeTrue("change_point must be enabled", EsqlCapabilities.Cap.CHANGE_POINT.isEnabled());
         var airports = AnalyzerTestUtils.analyzer(loadMapping("mapping-airports.json", "airports"));
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/grouping/CategorizeErrorTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/grouping/CategorizeErrorTests.java
index f674f9b2c3d72..97d5b8e3ece96 100644
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/grouping/CategorizeErrorTests.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/grouping/CategorizeErrorTests.java
@@ -27,7 +27,7 @@ protected List<TestCaseSupplier> cases() {
 
     @Override
     protected Expression build(Source source, List<Expression> args) {
-        return new Categorize(source, args.get(0));
+        return new Categorize(source, args.get(0), args.size() > 1 ? args.get(1) : null);
     }
 
     @Override
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/grouping/CategorizeTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/grouping/CategorizeTests.java
index f69bb7eb3e7bb..296d624ee1777 100644
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/grouping/CategorizeTests.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/grouping/CategorizeTests.java
@@ -61,7 +61,7 @@ public static Iterable<Object[]> parameters() {
 
     @Override
     protected Expression build(Source source, List<Expression> args) {
-        return new Categorize(source, args.get(0));
+        return new Categorize(source, args.get(0), args.size() > 1 ? args.get(1) : null);
     }
 
     @Override
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/FoldNullTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/FoldNullTests.java
index 96e26fbd37a4c..ae30dce97ce5a 100644
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/FoldNullTests.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/FoldNullTests.java
@@ -269,7 +269,7 @@ public void testNullBucketGetsFolded() {
     }
 
     public void testNullCategorizeGroupingNotFolded() {
-        Categorize categorize = new Categorize(EMPTY, NULL);
+        Categorize categorize = new Categorize(EMPTY, NULL, NULL);
         assertEquals(categorize, foldNull(categorize));
     }
 
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/SerializableTokenListCategory.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/SerializableTokenListCategory.java
index 5686f3734f36e..47e20d1a56bf2 100644
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/SerializableTokenListCategory.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/SerializableTokenListCategory.java
@@ -162,6 +162,13 @@ public BytesRef[] getKeyTokens() {
         return Arrays.stream(keyTokenIndexes).mapToObj(index -> baseTokens[index]).toArray(BytesRef[]::new);
     }
 
+    public String getKeyTokensString() {
+        return Arrays.stream(keyTokenIndexes)
+            .mapToObj(index -> baseTokens[index])
+            .map(BytesRef::utf8ToString)
+            .collect(Collectors.joining(" "));
+    }
+
     public String getRegex() {
         if (keyTokenIndexes.length == 0 || orderedCommonTokenBeginIndex == orderedCommonTokenEndIndex) {
             return ".*";

From 8d434cc0c45a7fc7077a73a10ead6e425a6ca717 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lorenzo=20Dematt=C3=A9?= <lorenzo.dematte@elastic.co>
Date: Thu, 17 Jul 2025 11:37:09 +0200
Subject: [PATCH 5/7] Migrate x-pack-autoscaling REST tests (#131365)

This PR migrates legacy rest tests in the x-pack autoscaling module
---
 .../internal/RestrictedBuildApiService.java   |  2 --
 x-pack/plugin/autoscaling/build.gradle        | 23 +++++++++------
 x-pack/plugin/autoscaling/qa/build.gradle     |  0
 .../plugin/autoscaling/qa/rest/build.gradle   | 28 -------------------
 .../xpack/autoscaling/AutoscalingRestIT.java  | 19 +++++++++++--
 .../resources}/autoscaling-roles.yml          |  0
 .../autoscaling/delete_autoscaling_policy.yml |  0
 .../autoscaling/get_autoscaling_capacity.yml  |  0
 .../autoscaling/get_autoscaling_policy.yml    |  0
 .../autoscaling/put_autoscaling_policy.yml    |  0
 10 files changed, 32 insertions(+), 40 deletions(-)
 delete mode 100644 x-pack/plugin/autoscaling/qa/build.gradle
 delete mode 100644 x-pack/plugin/autoscaling/qa/rest/build.gradle
 rename x-pack/plugin/autoscaling/{qa/rest => }/src/yamlRestTest/java/org/elasticsearch/xpack/autoscaling/AutoscalingRestIT.java (67%)
 rename x-pack/plugin/autoscaling/{qa/rest => src/yamlRestTest/resources}/autoscaling-roles.yml (100%)
 rename x-pack/plugin/autoscaling/{qa/rest => }/src/yamlRestTest/resources/rest-api-spec/test/autoscaling/delete_autoscaling_policy.yml (100%)
 rename x-pack/plugin/autoscaling/{qa/rest => }/src/yamlRestTest/resources/rest-api-spec/test/autoscaling/get_autoscaling_capacity.yml (100%)
 rename x-pack/plugin/autoscaling/{qa/rest => }/src/yamlRestTest/resources/rest-api-spec/test/autoscaling/get_autoscaling_policy.yml (100%)
 rename x-pack/plugin/autoscaling/{qa/rest => }/src/yamlRestTest/resources/rest-api-spec/test/autoscaling/put_autoscaling_policy.yml (100%)

diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/RestrictedBuildApiService.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/RestrictedBuildApiService.java
index 4f3c4b3d94f68..205930133156c 100644
--- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/RestrictedBuildApiService.java
+++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/RestrictedBuildApiService.java
@@ -56,8 +56,6 @@ private static ListMultimap<Class<?>, String> createLegacyRestTestBasePluginUsag
         map.put(LegacyRestTestBasePlugin.class, ":x-pack:qa:third-party:jira");
         map.put(LegacyRestTestBasePlugin.class, ":x-pack:qa:third-party:pagerduty");
         map.put(LegacyRestTestBasePlugin.class, ":x-pack:qa:third-party:slack");
-        map.put(LegacyRestTestBasePlugin.class, ":x-pack:plugin:async-search:qa:rest");
-        map.put(LegacyRestTestBasePlugin.class, ":x-pack:plugin:autoscaling:qa:rest");
         map.put(LegacyRestTestBasePlugin.class, ":x-pack:plugin:deprecation:qa:early-deprecation-rest");
         map.put(LegacyRestTestBasePlugin.class, ":x-pack:plugin:deprecation:qa:rest");
         map.put(LegacyRestTestBasePlugin.class, ":x-pack:plugin:downsample:qa:with-security");
diff --git a/x-pack/plugin/autoscaling/build.gradle b/x-pack/plugin/autoscaling/build.gradle
index 24400a0fc418e..22a43654fd602 100644
--- a/x-pack/plugin/autoscaling/build.gradle
+++ b/x-pack/plugin/autoscaling/build.gradle
@@ -1,5 +1,6 @@
 apply plugin: 'elasticsearch.internal-es-plugin'
 apply plugin: 'elasticsearch.internal-cluster-test'
+apply plugin: 'elasticsearch.internal-yaml-rest-test'
 
 esplugin {
   name = 'x-pack-autoscaling'
@@ -15,15 +16,21 @@ base {
 
 dependencies {
   compileOnly project(path: xpackModule('core'))
-  testImplementation(testArtifact(project(xpackModule('core'))))
-  testImplementation project(':modules:data-streams')
-  testImplementation project(path: xpackModule('blob-cache'))
-  testImplementation project(path: xpackModule('searchable-snapshots'))
-  testImplementation project(path: xpackModule('ilm'))
-  testImplementation project(path: xpackModule('slm'))
-  testImplementation project(path: xpackModule('ccr'))
 
+  testImplementation testArtifact(project(':server'))
+  testImplementation testArtifact(project(xpackModule('core')))
   testImplementation "com.fasterxml.jackson.core:jackson-databind:${versions.jackson}"
+
+  internalClusterTestImplementation project(':modules:data-streams')
+  internalClusterTestImplementation project(xpackModule('blob-cache'))
+  internalClusterTestImplementation project(xpackModule("searchable-snapshots"))
+  internalClusterTestImplementation project(xpackModule('ilm'))
+  internalClusterTestImplementation project(xpackModule('slm'))
+  internalClusterTestImplementation project(xpackModule('ccr'))
 }
 
-addQaCheckDependencies(project)
+restResources {
+  restApi {
+    include '_common', 'autoscaling'
+  }
+}
diff --git a/x-pack/plugin/autoscaling/qa/build.gradle b/x-pack/plugin/autoscaling/qa/build.gradle
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/x-pack/plugin/autoscaling/qa/rest/build.gradle b/x-pack/plugin/autoscaling/qa/rest/build.gradle
deleted file mode 100644
index 903e76fd986cf..0000000000000
--- a/x-pack/plugin/autoscaling/qa/rest/build.gradle
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the Elastic License
- * 2.0; you may not use this file except in compliance with the Elastic License
- * 2.0.
- */
-
-apply plugin: 'elasticsearch.legacy-yaml-rest-test'
-apply plugin: 'elasticsearch.legacy-yaml-rest-compat-test'
-
-dependencies {
-  yamlRestTestImplementation(testArtifact(project(xpackModule('core'))))
-}
-
-restResources {
-  restApi {
-    include '_common', 'autoscaling'
-  }
-}
-
-testClusters.configureEach {
-  testDistribution = 'DEFAULT'
-  setting 'xpack.security.enabled', 'true'
-  setting 'xpack.license.self_generated.type', 'trial'
-  extraConfigFile 'roles.yml', file('autoscaling-roles.yml')
-  user username: 'autoscaling-admin', password: 'autoscaling-admin-password', role: 'superuser'
-  user username: 'autoscaling-user', password: 'autoscaling-user-password', role: 'autoscaling'
-}
diff --git a/x-pack/plugin/autoscaling/qa/rest/src/yamlRestTest/java/org/elasticsearch/xpack/autoscaling/AutoscalingRestIT.java b/x-pack/plugin/autoscaling/src/yamlRestTest/java/org/elasticsearch/xpack/autoscaling/AutoscalingRestIT.java
similarity index 67%
rename from x-pack/plugin/autoscaling/qa/rest/src/yamlRestTest/java/org/elasticsearch/xpack/autoscaling/AutoscalingRestIT.java
rename to x-pack/plugin/autoscaling/src/yamlRestTest/java/org/elasticsearch/xpack/autoscaling/AutoscalingRestIT.java
index 89bc24ecc1ed0..15bef71fe1ea5 100644
--- a/x-pack/plugin/autoscaling/qa/rest/src/yamlRestTest/java/org/elasticsearch/xpack/autoscaling/AutoscalingRestIT.java
+++ b/x-pack/plugin/autoscaling/src/yamlRestTest/java/org/elasticsearch/xpack/autoscaling/AutoscalingRestIT.java
@@ -12,13 +12,24 @@
 import org.elasticsearch.common.settings.SecureString;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.util.concurrent.ThreadContext;
+import org.elasticsearch.test.cluster.ElasticsearchCluster;
+import org.elasticsearch.test.cluster.util.resource.Resource;
 import org.elasticsearch.test.rest.yaml.ClientYamlTestCandidate;
 import org.elasticsearch.test.rest.yaml.ESClientYamlSuiteTestCase;
-
-import static org.elasticsearch.xpack.core.security.authc.support.UsernamePasswordToken.basicAuthHeaderValue;
+import org.junit.ClassRule;
 
 public class AutoscalingRestIT extends ESClientYamlSuiteTestCase {
 
+    @ClassRule
+    public static ElasticsearchCluster cluster = ElasticsearchCluster.local()
+        .module("x-pack-autoscaling")
+        .setting("xpack.security.enabled", "true")
+        .setting("xpack.license.self_generated.type", "trial")
+        .rolesFile(Resource.fromClasspath("autoscaling-roles.yml"))
+        .user("autoscaling-admin", "autoscaling-admin-password", "superuser", false)
+        .user("autoscaling-user", "autoscaling-user-password", "autoscaling", false)
+        .build();
+
     public AutoscalingRestIT(final ClientYamlTestCandidate testCandidate) {
         super(testCandidate);
     }
@@ -40,4 +51,8 @@ protected Settings restClientSettings() {
         return Settings.builder().put(ThreadContext.PREFIX + ".Authorization", value).build();
     }
 
+    @Override
+    protected String getTestRestCluster() {
+        return cluster.getHttpAddresses();
+    }
 }
diff --git a/x-pack/plugin/autoscaling/qa/rest/autoscaling-roles.yml b/x-pack/plugin/autoscaling/src/yamlRestTest/resources/autoscaling-roles.yml
similarity index 100%
rename from x-pack/plugin/autoscaling/qa/rest/autoscaling-roles.yml
rename to x-pack/plugin/autoscaling/src/yamlRestTest/resources/autoscaling-roles.yml
diff --git a/x-pack/plugin/autoscaling/qa/rest/src/yamlRestTest/resources/rest-api-spec/test/autoscaling/delete_autoscaling_policy.yml b/x-pack/plugin/autoscaling/src/yamlRestTest/resources/rest-api-spec/test/autoscaling/delete_autoscaling_policy.yml
similarity index 100%
rename from x-pack/plugin/autoscaling/qa/rest/src/yamlRestTest/resources/rest-api-spec/test/autoscaling/delete_autoscaling_policy.yml
rename to x-pack/plugin/autoscaling/src/yamlRestTest/resources/rest-api-spec/test/autoscaling/delete_autoscaling_policy.yml
diff --git a/x-pack/plugin/autoscaling/qa/rest/src/yamlRestTest/resources/rest-api-spec/test/autoscaling/get_autoscaling_capacity.yml b/x-pack/plugin/autoscaling/src/yamlRestTest/resources/rest-api-spec/test/autoscaling/get_autoscaling_capacity.yml
similarity index 100%
rename from x-pack/plugin/autoscaling/qa/rest/src/yamlRestTest/resources/rest-api-spec/test/autoscaling/get_autoscaling_capacity.yml
rename to x-pack/plugin/autoscaling/src/yamlRestTest/resources/rest-api-spec/test/autoscaling/get_autoscaling_capacity.yml
diff --git a/x-pack/plugin/autoscaling/qa/rest/src/yamlRestTest/resources/rest-api-spec/test/autoscaling/get_autoscaling_policy.yml b/x-pack/plugin/autoscaling/src/yamlRestTest/resources/rest-api-spec/test/autoscaling/get_autoscaling_policy.yml
similarity index 100%
rename from x-pack/plugin/autoscaling/qa/rest/src/yamlRestTest/resources/rest-api-spec/test/autoscaling/get_autoscaling_policy.yml
rename to x-pack/plugin/autoscaling/src/yamlRestTest/resources/rest-api-spec/test/autoscaling/get_autoscaling_policy.yml
diff --git a/x-pack/plugin/autoscaling/qa/rest/src/yamlRestTest/resources/rest-api-spec/test/autoscaling/put_autoscaling_policy.yml b/x-pack/plugin/autoscaling/src/yamlRestTest/resources/rest-api-spec/test/autoscaling/put_autoscaling_policy.yml
similarity index 100%
rename from x-pack/plugin/autoscaling/qa/rest/src/yamlRestTest/resources/rest-api-spec/test/autoscaling/put_autoscaling_policy.yml
rename to x-pack/plugin/autoscaling/src/yamlRestTest/resources/rest-api-spec/test/autoscaling/put_autoscaling_policy.yml

From 7dd4af3aaf9854d9c5e7cd40c41af4643bafcb9d Mon Sep 17 00:00:00 2001
From: Quentin Pradet <quentin.pradet@elastic.co>
Date: Thu, 17 Jul 2025 15:51:45 +0400
Subject: [PATCH 6/7] Remove 'index' from snapshot clear_cache query params
 (#131067)

It's already part of the path parts, it's not useful to duplicate it in query
parameters.
---
 .../rest-api-spec/api/searchable_snapshots.clear_cache.json   | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/searchable_snapshots.clear_cache.json b/rest-api-spec/src/main/resources/rest-api-spec/api/searchable_snapshots.clear_cache.json
index d2d7000195c04..8b39cdcb24218 100644
--- a/rest-api-spec/src/main/resources/rest-api-spec/api/searchable_snapshots.clear_cache.json
+++ b/rest-api-spec/src/main/resources/rest-api-spec/api/searchable_snapshots.clear_cache.json
@@ -50,10 +50,6 @@
         ],
         "default": "open",
         "description": "Whether to expand wildcard expression to concrete indices that are open, closed or both."
-      },
-      "index": {
-        "type": "list",
-        "description": "A comma-separated list of index name to limit the operation"
       }
     }
   }

From ea0d14b6be0b1afabd41971ae1f6416e683ac045 Mon Sep 17 00:00:00 2001
From: Ignacio Vera <ignacio.vera@elastic.co>
Date: Thu, 17 Jul 2025 13:14:46 +0100
Subject: [PATCH 7/7] [DiskBBQ] Use PackedLongValues to hold offsets on heap
 while writing (#131411)

---
 .../vectors/DefaultIVFVectorsWriter.java      | 25 +++++++++++--------
 .../index/codec/vectors/IVFVectorsWriter.java | 19 +++++++-------
 2 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java
index 726e84605157a..f34ad071b6ba0 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java
@@ -17,8 +17,11 @@
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.LongValues;
 import org.apache.lucene.util.VectorUtil;
 import org.apache.lucene.util.hnsw.IntToIntFunction;
+import org.apache.lucene.util.packed.PackedInts;
+import org.apache.lucene.util.packed.PackedLongValues;
 import org.elasticsearch.index.codec.vectors.cluster.HierarchicalKMeans;
 import org.elasticsearch.index.codec.vectors.cluster.KMeansResult;
 import org.elasticsearch.logging.LogManager;
@@ -46,7 +49,7 @@ public DefaultIVFVectorsWriter(SegmentWriteState state, FlatVectorsWriter rawVec
     }
 
     @Override
-    long[] buildAndWritePostingsLists(
+    LongValues buildAndWritePostingsLists(
         FieldInfo fieldInfo,
         CentroidSupplier centroidSupplier,
         FloatVectorValues floatVectorValues,
@@ -81,7 +84,7 @@ long[] buildAndWritePostingsLists(
             }
         }
         // write the posting lists
-        final long[] offsets = new long[centroidSupplier.size()];
+        final PackedLongValues.Builder offsets = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
         DocIdsWriter docIdsWriter = new DocIdsWriter();
         DiskBBQBulkWriter bulkWriter = new DiskBBQBulkWriter.OneBitDiskBBQBulkWriter(ES91OSQVectorsScorer.BULK_SIZE, postingsOutput);
         OnHeapQuantizedVectors onHeapQuantizedVectors = new OnHeapQuantizedVectors(
@@ -93,7 +96,7 @@ long[] buildAndWritePostingsLists(
             float[] centroid = centroidSupplier.centroid(c);
             int[] cluster = assignmentsByCluster[c];
             // TODO align???
-            offsets[c] = postingsOutput.getFilePointer();
+            offsets.add(postingsOutput.getFilePointer());
             int size = cluster.length;
             postingsOutput.writeVInt(size);
             postingsOutput.writeInt(Float.floatToIntBits(VectorUtil.dotProduct(centroid, centroid)));
@@ -109,11 +112,11 @@ long[] buildAndWritePostingsLists(
             printClusterQualityStatistics(assignmentsByCluster);
         }
 
-        return offsets;
+        return offsets.build();
     }
 
     @Override
-    long[] buildAndWritePostingsLists(
+    LongValues buildAndWritePostingsLists(
         FieldInfo fieldInfo,
         CentroidSupplier centroidSupplier,
         FloatVectorValues floatVectorValues,
@@ -199,7 +202,7 @@ long[] buildAndWritePostingsLists(
         }
         // now we can read the quantized vectors from the temporary file
         try (IndexInput quantizedVectorsInput = mergeState.segmentInfo.dir.openInput(quantizedVectorsTempName, IOContext.DEFAULT)) {
-            final long[] offsets = new long[centroidSupplier.size()];
+            final PackedLongValues.Builder offsets = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
             OffHeapQuantizedVectors offHeapQuantizedVectors = new OffHeapQuantizedVectors(
                 quantizedVectorsInput,
                 fieldInfo.getVectorDimension()
@@ -210,9 +213,9 @@ long[] buildAndWritePostingsLists(
                 float[] centroid = centroidSupplier.centroid(c);
                 int[] cluster = assignmentsByCluster[c];
                 boolean[] isOverspill = isOverspillByCluster[c];
-                // TODO align???
-                offsets[c] = postingsOutput.getFilePointer();
+                offsets.add(postingsOutput.getFilePointer());
                 int size = cluster.length;
+                // TODO align???
                 postingsOutput.writeVInt(size);
                 postingsOutput.writeInt(Float.floatToIntBits(VectorUtil.dotProduct(centroid, centroid)));
                 offHeapQuantizedVectors.reset(size, ord -> isOverspill[ord], ord -> cluster[ord]);
@@ -226,7 +229,7 @@ long[] buildAndWritePostingsLists(
             if (logger.isDebugEnabled()) {
                 printClusterQualityStatistics(assignmentsByCluster);
             }
-            return offsets;
+            return offsets.build();
         }
     }
 
@@ -270,7 +273,7 @@ void writeCentroids(
         FieldInfo fieldInfo,
         CentroidSupplier centroidSupplier,
         float[] globalCentroid,
-        long[] offsets,
+        LongValues offsets,
         IndexOutput centroidOutput
     ) throws IOException {
 
@@ -302,7 +305,7 @@ void writeCentroids(
             // write the centroids
             centroidOutput.writeBytes(buffer.array(), buffer.array().length);
             // write the offset of this posting list
-            centroidOutput.writeLong(offsets[i]);
+            centroidOutput.writeLong(offsets.get(i));
         }
     }
 
diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsWriter.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsWriter.java
index f828c96f7a9e1..149db2eb96b83 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsWriter.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsWriter.java
@@ -28,6 +28,7 @@
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.store.RandomAccessInput;
+import org.apache.lucene.util.LongValues;
 import org.apache.lucene.util.VectorUtil;
 import org.elasticsearch.core.IOUtils;
 import org.elasticsearch.core.SuppressForbidden;
@@ -126,11 +127,11 @@ abstract void writeCentroids(
         FieldInfo fieldInfo,
         CentroidSupplier centroidSupplier,
         float[] globalCentroid,
-        long[] centroidOffset,
+        LongValues centroidOffset,
         IndexOutput centroidOutput
     ) throws IOException;
 
-    abstract long[] buildAndWritePostingsLists(
+    abstract LongValues buildAndWritePostingsLists(
         FieldInfo fieldInfo,
         CentroidSupplier centroidSupplier,
         FloatVectorValues floatVectorValues,
@@ -139,7 +140,7 @@ abstract long[] buildAndWritePostingsLists(
         int[] overspillAssignments
     ) throws IOException;
 
-    abstract long[] buildAndWritePostingsLists(
+    abstract LongValues buildAndWritePostingsLists(
         FieldInfo fieldInfo,
         CentroidSupplier centroidSupplier,
         FloatVectorValues floatVectorValues,
@@ -160,7 +161,7 @@ abstract CentroidSupplier createCentroidSupplier(
     public final void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException {
         rawVectorDelegate.flush(maxDoc, sortMap);
         for (FieldWriter fieldWriter : fieldWriters) {
-            float[] globalCentroid = new float[fieldWriter.fieldInfo.getVectorDimension()];
+            final float[] globalCentroid = new float[fieldWriter.fieldInfo.getVectorDimension()];
             // build a float vector values with random access
             final FloatVectorValues floatVectorValues = getFloatVectorValues(fieldWriter.fieldInfo, fieldWriter.delegate, maxDoc);
             // build centroids
@@ -168,7 +169,7 @@ public final void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException {
             // wrap centroids with a supplier
             final CentroidSupplier centroidSupplier = new OnHeapCentroidSupplier(centroidAssignments.centroids());
             // write posting lists
-            final long[] offsets = buildAndWritePostingsLists(
+            final LongValues offsets = buildAndWritePostingsLists(
                 fieldWriter.fieldInfo,
                 centroidSupplier,
                 floatVectorValues,
@@ -176,9 +177,8 @@ public final void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException {
                 centroidAssignments.assignments(),
                 centroidAssignments.overspillAssignments()
             );
-            assert offsets.length == centroidSupplier.size();
-            final long centroidOffset = ivfCentroids.alignFilePointer(Float.BYTES);
             // write centroids
+            final long centroidOffset = ivfCentroids.alignFilePointer(Float.BYTES);
             writeCentroids(fieldWriter.fieldInfo, centroidSupplier, globalCentroid, offsets, ivfCentroids);
             final long centroidLength = ivfCentroids.getFilePointer() - centroidOffset;
             // write meta file
@@ -338,7 +338,7 @@ private void mergeOneFieldIVF(FieldInfo fieldInfo, MergeState mergeState) throws
                         calculatedGlobalCentroid
                     );
                     // write posting lists
-                    final long[] offsets = buildAndWritePostingsLists(
+                    final LongValues offsets = buildAndWritePostingsLists(
                         fieldInfo,
                         centroidSupplier,
                         floatVectorValues,
@@ -347,9 +347,8 @@ private void mergeOneFieldIVF(FieldInfo fieldInfo, MergeState mergeState) throws
                         assignments,
                         overspillAssignments
                     );
-                    assert offsets.length == centroidSupplier.size();
-                    centroidOffset = ivfCentroids.alignFilePointer(Float.BYTES);
                     // write centroids
+                    centroidOffset = ivfCentroids.alignFilePointer(Float.BYTES);
                     writeCentroids(fieldInfo, centroidSupplier, calculatedGlobalCentroid, offsets, ivfCentroids);
                     centroidLength = ivfCentroids.getFilePointer() - centroidOffset;
                     // write meta