diff --git a/docs/changelog/135434.yaml b/docs/changelog/135434.yaml
new file mode 100644
index 0000000000000..0a1506087a427
--- /dev/null
+++ b/docs/changelog/135434.yaml
@@ -0,0 +1,6 @@
+pr: 135434
+summary: Support extra field in TOP function
+area: ES|QL
+type: enhancement
+issues:
+ - 128630
diff --git a/docs/reference/query-languages/esql/_snippets/functions/parameters/top.md b/docs/reference/query-languages/esql/_snippets/functions/parameters/top.md
index 057bd91855a1a..c56bf558809d1 100644
--- a/docs/reference/query-languages/esql/_snippets/functions/parameters/top.md
+++ b/docs/reference/query-languages/esql/_snippets/functions/parameters/top.md
@@ -11,3 +11,6 @@
`order`
: The order to calculate the top values. Either `asc` or `desc`, and defaults to `asc` if omitted.
+`outputField`
+: The extra field that, if present, will be the output of the TOP call instead of `field`.
+
diff --git a/docs/reference/query-languages/esql/_snippets/functions/types/top.md b/docs/reference/query-languages/esql/_snippets/functions/types/top.md
index 559a779cd1d9d..063fb157980d0 100644
--- a/docs/reference/query-languages/esql/_snippets/functions/types/top.md
+++ b/docs/reference/query-languages/esql/_snippets/functions/types/top.md
@@ -2,22 +2,38 @@
**Supported types**
-| field | limit | order | result |
-| --- | --- | --- | --- |
-| boolean | integer | keyword | boolean |
-| boolean | integer | | boolean |
-| date | integer | keyword | date |
-| date | integer | | date |
-| double | integer | keyword | double |
-| double | integer | | double |
-| integer | integer | keyword | integer |
-| integer | integer | | integer |
-| ip | integer | keyword | ip |
-| ip | integer | | ip |
-| keyword | integer | keyword | keyword |
-| keyword | integer | | keyword |
-| long | integer | keyword | long |
-| long | integer | | long |
-| text | integer | keyword | keyword |
-| text | integer | | keyword |
+| field | limit | order | outputField | result |
+| --- | --- | --- | --- | --- |
+| boolean | integer | keyword | | boolean |
+| boolean | integer | | | boolean |
+| date | integer | keyword | date | date |
+| date | integer | keyword | double | double |
+| date | integer | keyword | integer | integer |
+| date | integer | keyword | long | long |
+| date | integer | keyword | | date |
+| date | integer | | | date |
+| double | integer | keyword | date | date |
+| double | integer | keyword | double | double |
+| double | integer | keyword | integer | integer |
+| double | integer | keyword | long | long |
+| double | integer | keyword | | double |
+| double | integer | | | double |
+| integer | integer | keyword | date | date |
+| integer | integer | keyword | double | double |
+| integer | integer | keyword | integer | integer |
+| integer | integer | keyword | long | long |
+| integer | integer | keyword | | integer |
+| integer | integer | | | integer |
+| ip | integer | keyword | | ip |
+| ip | integer | | | ip |
+| keyword | integer | keyword | | keyword |
+| keyword | integer | | | keyword |
+| long | integer | keyword | date | date |
+| long | integer | keyword | double | double |
+| long | integer | keyword | integer | integer |
+| long | integer | keyword | long | long |
+| long | integer | keyword | | long |
+| long | integer | | | long |
+| text | integer | keyword | | keyword |
+| text | integer | | | keyword |
diff --git a/docs/reference/query-languages/esql/images/functions/top.svg b/docs/reference/query-languages/esql/images/functions/top.svg
index 947890a49f31c..1987d050f981a 100644
--- a/docs/reference/query-languages/esql/images/functions/top.svg
+++ b/docs/reference/query-languages/esql/images/functions/top.svg
@@ -1 +1 @@
-
\ No newline at end of file
+
\ No newline at end of file
diff --git a/docs/reference/query-languages/esql/kibana/definition/functions/top.json b/docs/reference/query-languages/esql/kibana/definition/functions/top.json
index 8cef8d534764e..6308eed3bff61 100644
--- a/docs/reference/query-languages/esql/kibana/definition/functions/top.json
+++ b/docs/reference/query-languages/esql/kibana/definition/functions/top.json
@@ -88,6 +88,126 @@
"variadic" : false,
"returnType" : "date"
},
+ {
+ "params" : [
+ {
+ "name" : "field",
+ "type" : "date",
+ "optional" : false,
+ "description" : "The field to collect the top values for."
+ },
+ {
+ "name" : "limit",
+ "type" : "integer",
+ "optional" : false,
+ "description" : "The maximum number of values to collect."
+ },
+ {
+ "name" : "order",
+ "type" : "keyword",
+ "optional" : true,
+ "description" : "The order to calculate the top values. Either `asc` or `desc`, and defaults to `asc` if omitted."
+ },
+ {
+ "name" : "outputField",
+ "type" : "date",
+ "optional" : true,
+ "description" : "The extra field that, if present, will be the output of the TOP call instead of `field`."
+ }
+ ],
+ "variadic" : false,
+ "returnType" : "date"
+ },
+ {
+ "params" : [
+ {
+ "name" : "field",
+ "type" : "date",
+ "optional" : false,
+ "description" : "The field to collect the top values for."
+ },
+ {
+ "name" : "limit",
+ "type" : "integer",
+ "optional" : false,
+ "description" : "The maximum number of values to collect."
+ },
+ {
+ "name" : "order",
+ "type" : "keyword",
+ "optional" : true,
+ "description" : "The order to calculate the top values. Either `asc` or `desc`, and defaults to `asc` if omitted."
+ },
+ {
+ "name" : "outputField",
+ "type" : "double",
+ "optional" : true,
+ "description" : "The extra field that, if present, will be the output of the TOP call instead of `field`."
+ }
+ ],
+ "variadic" : false,
+ "returnType" : "double"
+ },
+ {
+ "params" : [
+ {
+ "name" : "field",
+ "type" : "date",
+ "optional" : false,
+ "description" : "The field to collect the top values for."
+ },
+ {
+ "name" : "limit",
+ "type" : "integer",
+ "optional" : false,
+ "description" : "The maximum number of values to collect."
+ },
+ {
+ "name" : "order",
+ "type" : "keyword",
+ "optional" : true,
+ "description" : "The order to calculate the top values. Either `asc` or `desc`, and defaults to `asc` if omitted."
+ },
+ {
+ "name" : "outputField",
+ "type" : "integer",
+ "optional" : true,
+ "description" : "The extra field that, if present, will be the output of the TOP call instead of `field`."
+ }
+ ],
+ "variadic" : false,
+ "returnType" : "integer"
+ },
+ {
+ "params" : [
+ {
+ "name" : "field",
+ "type" : "date",
+ "optional" : false,
+ "description" : "The field to collect the top values for."
+ },
+ {
+ "name" : "limit",
+ "type" : "integer",
+ "optional" : false,
+ "description" : "The maximum number of values to collect."
+ },
+ {
+ "name" : "order",
+ "type" : "keyword",
+ "optional" : true,
+ "description" : "The order to calculate the top values. Either `asc` or `desc`, and defaults to `asc` if omitted."
+ },
+ {
+ "name" : "outputField",
+ "type" : "long",
+ "optional" : true,
+ "description" : "The extra field that, if present, will be the output of the TOP call instead of `field`."
+ }
+ ],
+ "variadic" : false,
+ "returnType" : "long"
+ },
{
"params" : [
{
@@ -130,6 +250,126 @@
"variadic" : false,
"returnType" : "double"
},
+ {
+ "params" : [
+ {
+ "name" : "field",
+ "type" : "double",
+ "optional" : false,
+ "description" : "The field to collect the top values for."
+ },
+ {
+ "name" : "limit",
+ "type" : "integer",
+ "optional" : false,
+ "description" : "The maximum number of values to collect."
+ },
+ {
+ "name" : "order",
+ "type" : "keyword",
+ "optional" : true,
+ "description" : "The order to calculate the top values. Either `asc` or `desc`, and defaults to `asc` if omitted."
+ },
+ {
+ "name" : "outputField",
+ "type" : "date",
+ "optional" : true,
+ "description" : "The extra field that, if present, will be the output of the TOP call instead of `field`."
+ }
+ ],
+ "variadic" : false,
+ "returnType" : "date"
+ },
+ {
+ "params" : [
+ {
+ "name" : "field",
+ "type" : "double",
+ "optional" : false,
+ "description" : "The field to collect the top values for."
+ },
+ {
+ "name" : "limit",
+ "type" : "integer",
+ "optional" : false,
+ "description" : "The maximum number of values to collect."
+ },
+ {
+ "name" : "order",
+ "type" : "keyword",
+ "optional" : true,
+ "description" : "The order to calculate the top values. Either `asc` or `desc`, and defaults to `asc` if omitted."
+ },
+ {
+ "name" : "outputField",
+ "type" : "double",
+ "optional" : true,
+ "description" : "The extra field that, if present, will be the output of the TOP call instead of `field`."
+ }
+ ],
+ "variadic" : false,
+ "returnType" : "double"
+ },
+ {
+ "params" : [
+ {
+ "name" : "field",
+ "type" : "double",
+ "optional" : false,
+ "description" : "The field to collect the top values for."
+ },
+ {
+ "name" : "limit",
+ "type" : "integer",
+ "optional" : false,
+ "description" : "The maximum number of values to collect."
+ },
+ {
+ "name" : "order",
+ "type" : "keyword",
+ "optional" : true,
+ "description" : "The order to calculate the top values. Either `asc` or `desc`, and defaults to `asc` if omitted."
+ },
+ {
+ "name" : "outputField",
+ "type" : "integer",
+ "optional" : true,
+ "description" : "The extra field that, if present, will be the output of the TOP call instead of `field`."
+ }
+ ],
+ "variadic" : false,
+ "returnType" : "integer"
+ },
+ {
+ "params" : [
+ {
+ "name" : "field",
+ "type" : "double",
+ "optional" : false,
+ "description" : "The field to collect the top values for."
+ },
+ {
+ "name" : "limit",
+ "type" : "integer",
+ "optional" : false,
+ "description" : "The maximum number of values to collect."
+ },
+ {
+ "name" : "order",
+ "type" : "keyword",
+ "optional" : true,
+ "description" : "The order to calculate the top values. Either `asc` or `desc`, and defaults to `asc` if omitted."
+ },
+ {
+ "name" : "outputField",
+ "type" : "long",
+ "optional" : true,
+ "description" : "The extra field that, if present, will be the output of the TOP call instead of `field`."
+ }
+ ],
+ "variadic" : false,
+ "returnType" : "long"
+ },
{
"params" : [
{
@@ -172,6 +412,126 @@
"variadic" : false,
"returnType" : "integer"
},
+ {
+ "params" : [
+ {
+ "name" : "field",
+ "type" : "integer",
+ "optional" : false,
+ "description" : "The field to collect the top values for."
+ },
+ {
+ "name" : "limit",
+ "type" : "integer",
+ "optional" : false,
+ "description" : "The maximum number of values to collect."
+ },
+ {
+ "name" : "order",
+ "type" : "keyword",
+ "optional" : true,
+ "description" : "The order to calculate the top values. Either `asc` or `desc`, and defaults to `asc` if omitted."
+ },
+ {
+ "name" : "outputField",
+ "type" : "date",
+ "optional" : true,
+ "description" : "The extra field that, if present, will be the output of the TOP call instead of `field`."
+ }
+ ],
+ "variadic" : false,
+ "returnType" : "date"
+ },
+ {
+ "params" : [
+ {
+ "name" : "field",
+ "type" : "integer",
+ "optional" : false,
+ "description" : "The field to collect the top values for."
+ },
+ {
+ "name" : "limit",
+ "type" : "integer",
+ "optional" : false,
+ "description" : "The maximum number of values to collect."
+ },
+ {
+ "name" : "order",
+ "type" : "keyword",
+ "optional" : true,
+ "description" : "The order to calculate the top values. Either `asc` or `desc`, and defaults to `asc` if omitted."
+ },
+ {
+ "name" : "outputField",
+ "type" : "double",
+ "optional" : true,
+ "description" : "The extra field that, if present, will be the output of the TOP call instead of `field`."
+ }
+ ],
+ "variadic" : false,
+ "returnType" : "double"
+ },
+ {
+ "params" : [
+ {
+ "name" : "field",
+ "type" : "integer",
+ "optional" : false,
+ "description" : "The field to collect the top values for."
+ },
+ {
+ "name" : "limit",
+ "type" : "integer",
+ "optional" : false,
+ "description" : "The maximum number of values to collect."
+ },
+ {
+ "name" : "order",
+ "type" : "keyword",
+ "optional" : true,
+ "description" : "The order to calculate the top values. Either `asc` or `desc`, and defaults to `asc` if omitted."
+ },
+ {
+ "name" : "outputField",
+ "type" : "integer",
+ "optional" : true,
+ "description" : "The extra field that, if present, will be the output of the TOP call instead of `field`."
+ }
+ ],
+ "variadic" : false,
+ "returnType" : "integer"
+ },
+ {
+ "params" : [
+ {
+ "name" : "field",
+ "type" : "integer",
+ "optional" : false,
+ "description" : "The field to collect the top values for."
+ },
+ {
+ "name" : "limit",
+ "type" : "integer",
+ "optional" : false,
+ "description" : "The maximum number of values to collect."
+ },
+ {
+ "name" : "order",
+ "type" : "keyword",
+ "optional" : true,
+ "description" : "The order to calculate the top values. Either `asc` or `desc`, and defaults to `asc` if omitted."
+ },
+ {
+ "name" : "outputField",
+ "type" : "long",
+ "optional" : true,
+ "description" : "The extra field that, if present, will be the output of the TOP call instead of `field`."
+ }
+ ],
+ "variadic" : false,
+ "returnType" : "long"
+ },
{
"params" : [
{
@@ -298,6 +658,126 @@
"variadic" : false,
"returnType" : "long"
},
+ {
+ "params" : [
+ {
+ "name" : "field",
+ "type" : "long",
+ "optional" : false,
+ "description" : "The field to collect the top values for."
+ },
+ {
+ "name" : "limit",
+ "type" : "integer",
+ "optional" : false,
+ "description" : "The maximum number of values to collect."
+ },
+ {
+ "name" : "order",
+ "type" : "keyword",
+ "optional" : true,
+ "description" : "The order to calculate the top values. Either `asc` or `desc`, and defaults to `asc` if omitted."
+ },
+ {
+ "name" : "outputField",
+ "type" : "date",
+ "optional" : true,
+ "description" : "The extra field that, if present, will be the output of the TOP call instead of `field`."
+ }
+ ],
+ "variadic" : false,
+ "returnType" : "date"
+ },
+ {
+ "params" : [
+ {
+ "name" : "field",
+ "type" : "long",
+ "optional" : false,
+ "description" : "The field to collect the top values for."
+ },
+ {
+ "name" : "limit",
+ "type" : "integer",
+ "optional" : false,
+ "description" : "The maximum number of values to collect."
+ },
+ {
+ "name" : "order",
+ "type" : "keyword",
+ "optional" : true,
+ "description" : "The order to calculate the top values. Either `asc` or `desc`, and defaults to `asc` if omitted."
+ },
+ {
+ "name" : "outputField",
+ "type" : "double",
+ "optional" : true,
+ "description" : "The extra field that, if present, will be the output of the TOP call instead of `field`."
+ }
+ ],
+ "variadic" : false,
+ "returnType" : "double"
+ },
+ {
+ "params" : [
+ {
+ "name" : "field",
+ "type" : "long",
+ "optional" : false,
+ "description" : "The field to collect the top values for."
+ },
+ {
+ "name" : "limit",
+ "type" : "integer",
+ "optional" : false,
+ "description" : "The maximum number of values to collect."
+ },
+ {
+ "name" : "order",
+ "type" : "keyword",
+ "optional" : true,
+ "description" : "The order to calculate the top values. Either `asc` or `desc`, and defaults to `asc` if omitted."
+ },
+ {
+ "name" : "outputField",
+ "type" : "integer",
+ "optional" : true,
+ "description" : "The extra field that, if present, will be the output of the TOP call instead of `field`."
+ }
+ ],
+ "variadic" : false,
+ "returnType" : "integer"
+ },
+ {
+ "params" : [
+ {
+ "name" : "field",
+ "type" : "long",
+ "optional" : false,
+ "description" : "The field to collect the top values for."
+ },
+ {
+ "name" : "limit",
+ "type" : "integer",
+ "optional" : false,
+ "description" : "The maximum number of values to collect."
+ },
+ {
+ "name" : "order",
+ "type" : "keyword",
+ "optional" : true,
+ "description" : "The order to calculate the top values. Either `asc` or `desc`, and defaults to `asc` if omitted."
+ },
+ {
+ "name" : "outputField",
+ "type" : "long",
+ "optional" : true,
+ "description" : "The extra field that, if present, will be the output of the TOP call instead of `field`."
+ }
+ ],
+ "variadic" : false,
+ "returnType" : "long"
+ },
{
"params" : [
{
diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/TypeResolutions.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/TypeResolutions.java
index 56bbf69c76e55..50a8c46018b08 100644
--- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/TypeResolutions.java
+++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/TypeResolutions.java
@@ -211,6 +211,18 @@ public static TypeResolution isType(
ParamOrdinal paramOrd,
boolean allowUnionTypes,
String... acceptedTypes
+ ) {
+ return isType(e, predicate, null, operationName, paramOrd, allowUnionTypes, acceptedTypes);
+ }
+
+ public static TypeResolution isType(
+ Expression e,
+ Predicate predicate,
+ String errorMessagePrefix,
+ String operationName,
+ ParamOrdinal paramOrd,
+ boolean allowUnionTypes,
+ String... acceptedTypes
) {
if (predicate.test(e.dataType()) || e.dataType() == NULL) {
return TypeResolution.TYPE_RESOLVED;
@@ -225,11 +237,19 @@ public static TypeResolution isType(
}
return new TypeResolution(
- errorStringIncompatibleTypes(operationName, paramOrd, name(e), e.dataType(), acceptedTypesForErrorMsg(acceptedTypes))
+ errorStringIncompatibleTypes(
+ errorMessagePrefix,
+ operationName,
+ paramOrd,
+ name(e),
+ e.dataType(),
+ acceptedTypesForErrorMsg(acceptedTypes)
+ )
);
}
private static String errorStringIncompatibleTypes(
+ String errorMessagePrefix,
String operationName,
ParamOrdinal paramOrd,
String argumentName,
@@ -237,7 +257,7 @@ private static String errorStringIncompatibleTypes(
String... acceptedTypes
) {
return format(
- null,
+ errorMessagePrefix,
"{}argument of [{}] must be [{}], found value [{}] type [{}]",
paramOrd == null || paramOrd == DEFAULT ? "" : paramOrd.name().toLowerCase(Locale.ROOT) + " ",
operationName,
diff --git a/x-pack/plugin/esql/compute/build.gradle b/x-pack/plugin/esql/compute/build.gradle
index bd4bb33873be5..44f9bdd331238 100644
--- a/x-pack/plugin/esql/compute/build.gradle
+++ b/x-pack/plugin/esql/compute/build.gradle
@@ -51,7 +51,7 @@ spotless {
* Generated files go here.
*/
toggleOffOn('begin generated imports', 'end generated imports')
- targetExclude "src/main/generated/**/*.java"
+ targetExclude "src/main/generated*/**/*.java"
}
}
@@ -81,6 +81,26 @@ def prop(Name, Type, type, Wrapper, TYPE, BYTES, Array, Hash) {
]
}
+def propWithoutExtra(prop1, extraPrefix) {
+ def res = [ ("has" + extraPrefix): "" ]
+ for ( e in prop1 ) {
+ res.put(e.key, e.value)
+ res.put(extraPrefix + e.key, "")
+ }
+ return res
+}
+
+def propWithExtra(prop1, prop2, extraPrefix) {
+ def res = [ ("has" + extraPrefix): "true" ]
+ for ( e in prop1 ) {
+ res.put(e.key, e.value)
+ }
+ for ( e in prop2 ) {
+ res.put(extraPrefix + e.key, e.value)
+ }
+ return res
+}
+
def addOccurrence(props, Occurrence) {
def newProps = props.collectEntries { [(it.key): it.value] }
newProps["Occurrence"] = Occurrence
@@ -723,40 +743,29 @@ tasks.named('stringTemplates').configure {
}
File topAggregatorInputFile = new File("${projectDir}/src/main/java/org/elasticsearch/compute/aggregation/X-TopAggregator.java.st")
- template {
- it.properties = intProperties
- it.inputFile = topAggregatorInputFile
- it.outputFile = "org/elasticsearch/compute/aggregation/TopIntAggregator.java"
- }
- template {
- it.properties = longProperties
- it.inputFile = topAggregatorInputFile
- it.outputFile = "org/elasticsearch/compute/aggregation/TopLongAggregator.java"
- }
- template {
- it.properties = floatProperties
- it.inputFile = topAggregatorInputFile
- it.outputFile = "org/elasticsearch/compute/aggregation/TopFloatAggregator.java"
- }
- template {
- it.properties = doubleProperties
- it.inputFile = topAggregatorInputFile
- it.outputFile = "org/elasticsearch/compute/aggregation/TopDoubleAggregator.java"
- }
- template {
- it.properties = booleanProperties
- it.inputFile = topAggregatorInputFile
- it.outputFile = "org/elasticsearch/compute/aggregation/TopBooleanAggregator.java"
- }
- template {
- it.properties = bytesRefProperties
- it.inputFile = topAggregatorInputFile
- it.outputFile = "org/elasticsearch/compute/aggregation/TopBytesRefAggregator.java"
+ // Simple TOP when the sort field and the output field are the same field
+ [intProperties, longProperties, floatProperties, doubleProperties, booleanProperties, bytesRefProperties, ipProperties].forEach { props ->
+ {
+ template {
+ it.properties = propWithoutExtra(props, "OutputField")
+ it.inputFile = topAggregatorInputFile
+ it.outputFile = "org/elasticsearch/compute/aggregation/Top${props.Name}Aggregator.java"
+ }
+ }
}
- template {
- it.properties = ipProperties
- it.inputFile = topAggregatorInputFile
- it.outputFile = "org/elasticsearch/compute/aggregation/TopIpAggregator.java"
+ // TOP when the sort field and the output field can be *different* fields
+ [intProperties, longProperties, floatProperties, doubleProperties].forEach { props1 ->
+ {
+ [intProperties, longProperties, floatProperties, doubleProperties].forEach { props2 ->
+ {
+ template {
+ it.properties = propWithExtra(props1, props2, "OutputField")
+ it.inputFile = topAggregatorInputFile
+ it.outputFile = "org/elasticsearch/compute/aggregation/Top${props1.Name}${props2.Name}Aggregator.java"
+ }
+ }
+ }
+ }
}
File multivalueDedupeInputFile = file("src/main/java/org/elasticsearch/compute/operator/mvdedupe/X-MultivalueDedupe.java.st")
@@ -896,25 +905,28 @@ tasks.named('stringTemplates').configure {
}
File bucketedSortInputFile = new File("${projectDir}/src/main/java/org/elasticsearch/compute/data/sort/X-BucketedSort.java.st")
- template {
- it.properties = intProperties
- it.inputFile = bucketedSortInputFile
- it.outputFile = "org/elasticsearch/compute/data/sort/IntBucketedSort.java"
- }
- template {
- it.properties = longProperties
- it.inputFile = bucketedSortInputFile
- it.outputFile = "org/elasticsearch/compute/data/sort/LongBucketedSort.java"
- }
- template {
- it.properties = floatProperties
- it.inputFile = bucketedSortInputFile
- it.outputFile = "org/elasticsearch/compute/data/sort/FloatBucketedSort.java"
+ [intProperties, longProperties, floatProperties, doubleProperties].forEach { props ->
+ {
+ template {
+ it.properties = propWithoutExtra(props, "Extra")
+ it.inputFile = bucketedSortInputFile
+ it.outputFile = "org/elasticsearch/compute/data/sort/${props.Name}BucketedSort.java"
+ }
+ }
}
- template {
- it.properties = doubleProperties
- it.inputFile = bucketedSortInputFile
- it.outputFile = "org/elasticsearch/compute/data/sort/DoubleBucketedSort.java"
+ // TOP when the sort field and the output field can be *different* fields
+ [intProperties, longProperties, floatProperties, doubleProperties].forEach { props1 ->
+ {
+ [intProperties, longProperties, floatProperties, doubleProperties].forEach { props2 ->
+ {
+ template {
+ it.properties = propWithExtra(props1, props2, "Extra")
+ it.inputFile = bucketedSortInputFile
+ it.outputFile = "org/elasticsearch/compute/data/sort/${props1.Name}${props2.Name}BucketedSort.java"
+ }
+ }
+ }
+ }
}
File enrichResultBuilderInput = file("src/main/java/org/elasticsearch/compute/operator/lookup/X-EnrichResultBuilder.java.st")
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopBooleanAggregator.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopBooleanAggregator.java
index f93e3095524c4..05964998003ea 100644
--- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopBooleanAggregator.java
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopBooleanAggregator.java
@@ -18,7 +18,6 @@
import org.elasticsearch.compute.data.BlockFactory;
import org.elasticsearch.compute.data.BooleanBlock;
import org.elasticsearch.compute.data.IntVector;
-import org.elasticsearch.compute.data.BooleanBlock;
import org.elasticsearch.compute.data.sort.BooleanBucketedSort;
import org.elasticsearch.compute.operator.DriverContext;
import org.elasticsearch.core.Releasables;
@@ -118,7 +117,9 @@ public void add(boolean value) {
@Override
public void toIntermediate(Block[] blocks, int offset, DriverContext driverContext) {
- blocks[offset] = toBlock(driverContext.blockFactory());
+ try (var intValues = driverContext.blockFactory().newConstantIntVector(0, 1)) {
+ internalState.toIntermediate(blocks, offset, intValues, driverContext);
+ }
}
Block toBlock(BlockFactory blockFactory) {
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopBytesRefAggregator.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopBytesRefAggregator.java
index ecc68e7d8a992..88a5d5021983f 100644
--- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopBytesRefAggregator.java
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopBytesRefAggregator.java
@@ -18,7 +18,6 @@
import org.elasticsearch.compute.data.BlockFactory;
import org.elasticsearch.compute.data.BytesRefBlock;
import org.elasticsearch.compute.data.IntVector;
-import org.elasticsearch.compute.data.BytesRefBlock;
import org.elasticsearch.compute.data.sort.BytesRefBucketedSort;
import org.elasticsearch.compute.operator.DriverContext;
import org.elasticsearch.core.Releasables;
@@ -122,7 +121,9 @@ public void add(BytesRef value) {
@Override
public void toIntermediate(Block[] blocks, int offset, DriverContext driverContext) {
- blocks[offset] = toBlock(driverContext.blockFactory());
+ try (var intValues = driverContext.blockFactory().newConstantIntVector(0, 1)) {
+ internalState.toIntermediate(blocks, offset, intValues, driverContext);
+ }
}
Block toBlock(BlockFactory blockFactory) {
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopDoubleAggregator.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopDoubleAggregator.java
index e9e1803e36fff..df1294c610ed5 100644
--- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopDoubleAggregator.java
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopDoubleAggregator.java
@@ -18,7 +18,6 @@
import org.elasticsearch.compute.data.BlockFactory;
import org.elasticsearch.compute.data.DoubleBlock;
import org.elasticsearch.compute.data.IntVector;
-import org.elasticsearch.compute.data.DoubleBlock;
import org.elasticsearch.compute.data.sort.DoubleBucketedSort;
import org.elasticsearch.compute.operator.DriverContext;
import org.elasticsearch.core.Releasables;
@@ -118,7 +117,9 @@ public void add(double value) {
@Override
public void toIntermediate(Block[] blocks, int offset, DriverContext driverContext) {
- blocks[offset] = toBlock(driverContext.blockFactory());
+ try (var intValues = driverContext.blockFactory().newConstantIntVector(0, 1)) {
+ internalState.toIntermediate(blocks, offset, intValues, driverContext);
+ }
}
Block toBlock(BlockFactory blockFactory) {
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopDoubleDoubleAggregator.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopDoubleDoubleAggregator.java
new file mode 100644
index 0000000000000..45cd12e43c182
--- /dev/null
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopDoubleDoubleAggregator.java
@@ -0,0 +1,140 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.aggregation;
+
+// begin generated imports
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.breaker.CircuitBreaker;
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.compute.ann.Aggregator;
+import org.elasticsearch.compute.ann.GroupingAggregator;
+import org.elasticsearch.compute.ann.IntermediateState;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.DoubleBlock;
+import org.elasticsearch.compute.data.DoubleBlock;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.compute.data.sort.DoubleDoubleBucketedSort;
+import org.elasticsearch.compute.operator.DriverContext;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.search.sort.SortOrder;
+// end generated imports
+
+/**
+ * Aggregates the top N field values for double.
+ *
+ * This class is generated. Edit `X-TopAggregator.java.st` to edit this file.
+ *
+ */
+@Aggregator({ @IntermediateState(name = "top", type = "DOUBLE_BLOCK"), @IntermediateState(name = "output", type = "DOUBLE_BLOCK") })
+@GroupingAggregator
+class TopDoubleDoubleAggregator {
+ public static SingleState initSingle(BigArrays bigArrays, int limit, boolean ascending) {
+ return new SingleState(bigArrays, limit, ascending);
+ }
+
+ public static void combine(SingleState state, double v, double outputValue) {
+ state.add(v, outputValue);
+ }
+
+ public static void combineIntermediate(SingleState state, DoubleBlock values, DoubleBlock outputValues) {
+ int start = values.getFirstValueIndex(0);
+ int end = start + values.getValueCount(0);
+ for (int i = start; i < end; i++) {
+ combine(state, values.getDouble(i), outputValues.getDouble(i));
+ }
+ }
+
+ public static Block evaluateFinal(SingleState state, DriverContext driverContext) {
+ return state.toBlock(driverContext.blockFactory());
+ }
+
+ public static GroupingState initGrouping(BigArrays bigArrays, int limit, boolean ascending) {
+ return new GroupingState(bigArrays, limit, ascending);
+ }
+
+ public static void combine(GroupingState state, int groupId, double v, double outputValue) {
+ state.add(groupId, v, outputValue);
+ }
+
+ public static void combineIntermediate(GroupingState state, int groupId, DoubleBlock values, DoubleBlock outputValues, int valuesPosition) {
+ int start = values.getFirstValueIndex(valuesPosition);
+ int end = start + values.getValueCount(valuesPosition);
+ for (int i = start; i < end; i++) {
+ combine(state, groupId, values.getDouble(i), outputValues.getDouble(i));
+ }
+ }
+
+ public static Block evaluateFinal(GroupingState state, IntVector selected, GroupingAggregatorEvaluationContext ctx) {
+ return state.toBlock(ctx.blockFactory(), selected);
+ }
+
+ public static class GroupingState implements GroupingAggregatorState {
+ private final DoubleDoubleBucketedSort sort;
+
+ private GroupingState(BigArrays bigArrays, int limit, boolean ascending) {
+ this.sort = new DoubleDoubleBucketedSort(bigArrays, ascending ? SortOrder.ASC : SortOrder.DESC, limit);
+ }
+
+ public void add(int groupId, double value, double outputValue) {
+ sort.collect(value, outputValue, groupId);
+ }
+
+ @Override
+ public void toIntermediate(Block[] blocks, int offset, IntVector selected, DriverContext driverContext) {
+ sort.toBlocks(driverContext.blockFactory(), blocks, offset, selected);
+ }
+
+ Block toBlock(BlockFactory blockFactory, IntVector selected) {
+ Block[] blocks = new Block[2];
+ sort.toBlocks(blockFactory, blocks, 0, selected);
+ Releasables.close(blocks[0]);
+ return blocks[1];
+ }
+
+ @Override
+ public void enableGroupIdTracking(SeenGroupIds seen) {
+ // we figure out seen values from nulls on the values block
+ }
+
+ @Override
+ public void close() {
+ Releasables.closeExpectNoException(sort);
+ }
+ }
+
+ public static class SingleState implements AggregatorState {
+ private final GroupingState internalState;
+
+ private SingleState(BigArrays bigArrays, int limit, boolean ascending) {
+ this.internalState = new GroupingState(bigArrays, limit, ascending);
+ }
+
+ public void add(double value, double outputValue) {
+ internalState.add(0, value, outputValue);
+ }
+
+ @Override
+ public void toIntermediate(Block[] blocks, int offset, DriverContext driverContext) {
+ try (var intValues = driverContext.blockFactory().newConstantIntVector(0, 1)) {
+ internalState.toIntermediate(blocks, offset, intValues, driverContext);
+ }
+ }
+
+ Block toBlock(BlockFactory blockFactory) {
+ try (var intValues = blockFactory.newConstantIntVector(0, 1)) {
+ return internalState.toBlock(blockFactory, intValues);
+ }
+ }
+
+ @Override
+ public void close() {
+ Releasables.closeExpectNoException(internalState);
+ }
+ }
+}
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopDoubleFloatAggregator.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopDoubleFloatAggregator.java
new file mode 100644
index 0000000000000..da103d70c0a54
--- /dev/null
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopDoubleFloatAggregator.java
@@ -0,0 +1,140 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.aggregation;
+
+// begin generated imports
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.breaker.CircuitBreaker;
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.compute.ann.Aggregator;
+import org.elasticsearch.compute.ann.GroupingAggregator;
+import org.elasticsearch.compute.ann.IntermediateState;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.DoubleBlock;
+import org.elasticsearch.compute.data.FloatBlock;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.compute.data.sort.DoubleFloatBucketedSort;
+import org.elasticsearch.compute.operator.DriverContext;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.search.sort.SortOrder;
+// end generated imports
+
+/**
+ * Aggregates the top N field values for double.
+ *
+ * This class is generated. Edit `X-TopAggregator.java.st` to edit this file.
+ *
+ */
+@Aggregator({ @IntermediateState(name = "top", type = "DOUBLE_BLOCK"), @IntermediateState(name = "output", type = "FLOAT_BLOCK") })
+@GroupingAggregator
+class TopDoubleFloatAggregator {
+ public static SingleState initSingle(BigArrays bigArrays, int limit, boolean ascending) {
+ return new SingleState(bigArrays, limit, ascending);
+ }
+
+ public static void combine(SingleState state, double v, float outputValue) {
+ state.add(v, outputValue);
+ }
+
+ public static void combineIntermediate(SingleState state, DoubleBlock values, FloatBlock outputValues) {
+ int start = values.getFirstValueIndex(0);
+ int end = start + values.getValueCount(0);
+ for (int i = start; i < end; i++) {
+ combine(state, values.getDouble(i), outputValues.getFloat(i));
+ }
+ }
+
+ public static Block evaluateFinal(SingleState state, DriverContext driverContext) {
+ return state.toBlock(driverContext.blockFactory());
+ }
+
+ public static GroupingState initGrouping(BigArrays bigArrays, int limit, boolean ascending) {
+ return new GroupingState(bigArrays, limit, ascending);
+ }
+
+ public static void combine(GroupingState state, int groupId, double v, float outputValue) {
+ state.add(groupId, v, outputValue);
+ }
+
+ public static void combineIntermediate(GroupingState state, int groupId, DoubleBlock values, FloatBlock outputValues, int valuesPosition) {
+ int start = values.getFirstValueIndex(valuesPosition);
+ int end = start + values.getValueCount(valuesPosition);
+ for (int i = start; i < end; i++) {
+ combine(state, groupId, values.getDouble(i), outputValues.getFloat(i));
+ }
+ }
+
+ public static Block evaluateFinal(GroupingState state, IntVector selected, GroupingAggregatorEvaluationContext ctx) {
+ return state.toBlock(ctx.blockFactory(), selected);
+ }
+
+ public static class GroupingState implements GroupingAggregatorState {
+ private final DoubleFloatBucketedSort sort;
+
+ private GroupingState(BigArrays bigArrays, int limit, boolean ascending) {
+ this.sort = new DoubleFloatBucketedSort(bigArrays, ascending ? SortOrder.ASC : SortOrder.DESC, limit);
+ }
+
+ public void add(int groupId, double value, float outputValue) {
+ sort.collect(value, outputValue, groupId);
+ }
+
+ @Override
+ public void toIntermediate(Block[] blocks, int offset, IntVector selected, DriverContext driverContext) {
+ sort.toBlocks(driverContext.blockFactory(), blocks, offset, selected);
+ }
+
+ Block toBlock(BlockFactory blockFactory, IntVector selected) {
+ Block[] blocks = new Block[2];
+ sort.toBlocks(blockFactory, blocks, 0, selected);
+ Releasables.close(blocks[0]);
+ return blocks[1];
+ }
+
+ @Override
+ public void enableGroupIdTracking(SeenGroupIds seen) {
+ // we figure out seen values from nulls on the values block
+ }
+
+ @Override
+ public void close() {
+ Releasables.closeExpectNoException(sort);
+ }
+ }
+
+ public static class SingleState implements AggregatorState {
+ private final GroupingState internalState;
+
+ private SingleState(BigArrays bigArrays, int limit, boolean ascending) {
+ this.internalState = new GroupingState(bigArrays, limit, ascending);
+ }
+
+ public void add(double value, float outputValue) {
+ internalState.add(0, value, outputValue);
+ }
+
+ @Override
+ public void toIntermediate(Block[] blocks, int offset, DriverContext driverContext) {
+ try (var intValues = driverContext.blockFactory().newConstantIntVector(0, 1)) {
+ internalState.toIntermediate(blocks, offset, intValues, driverContext);
+ }
+ }
+
+ Block toBlock(BlockFactory blockFactory) {
+ try (var intValues = blockFactory.newConstantIntVector(0, 1)) {
+ return internalState.toBlock(blockFactory, intValues);
+ }
+ }
+
+ @Override
+ public void close() {
+ Releasables.closeExpectNoException(internalState);
+ }
+ }
+}
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopDoubleIntAggregator.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopDoubleIntAggregator.java
new file mode 100644
index 0000000000000..2b0790c303e59
--- /dev/null
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopDoubleIntAggregator.java
@@ -0,0 +1,140 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.aggregation;
+
+// begin generated imports
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.breaker.CircuitBreaker;
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.compute.ann.Aggregator;
+import org.elasticsearch.compute.ann.GroupingAggregator;
+import org.elasticsearch.compute.ann.IntermediateState;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.DoubleBlock;
+import org.elasticsearch.compute.data.IntBlock;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.compute.data.sort.DoubleIntBucketedSort;
+import org.elasticsearch.compute.operator.DriverContext;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.search.sort.SortOrder;
+// end generated imports
+
+/**
+ * Aggregates the top N field values for double.
+ *
+ * This class is generated. Edit `X-TopAggregator.java.st` to edit this file.
+ *
+ */
+@Aggregator({ @IntermediateState(name = "top", type = "DOUBLE_BLOCK"), @IntermediateState(name = "output", type = "INT_BLOCK") })
+@GroupingAggregator
+class TopDoubleIntAggregator {
+ public static SingleState initSingle(BigArrays bigArrays, int limit, boolean ascending) {
+ return new SingleState(bigArrays, limit, ascending);
+ }
+
+ public static void combine(SingleState state, double v, int outputValue) {
+ state.add(v, outputValue);
+ }
+
+ public static void combineIntermediate(SingleState state, DoubleBlock values, IntBlock outputValues) {
+ int start = values.getFirstValueIndex(0);
+ int end = start + values.getValueCount(0);
+ for (int i = start; i < end; i++) {
+ combine(state, values.getDouble(i), outputValues.getInt(i));
+ }
+ }
+
+ public static Block evaluateFinal(SingleState state, DriverContext driverContext) {
+ return state.toBlock(driverContext.blockFactory());
+ }
+
+ public static GroupingState initGrouping(BigArrays bigArrays, int limit, boolean ascending) {
+ return new GroupingState(bigArrays, limit, ascending);
+ }
+
+ public static void combine(GroupingState state, int groupId, double v, int outputValue) {
+ state.add(groupId, v, outputValue);
+ }
+
+ public static void combineIntermediate(GroupingState state, int groupId, DoubleBlock values, IntBlock outputValues, int valuesPosition) {
+ int start = values.getFirstValueIndex(valuesPosition);
+ int end = start + values.getValueCount(valuesPosition);
+ for (int i = start; i < end; i++) {
+ combine(state, groupId, values.getDouble(i), outputValues.getInt(i));
+ }
+ }
+
+ public static Block evaluateFinal(GroupingState state, IntVector selected, GroupingAggregatorEvaluationContext ctx) {
+ return state.toBlock(ctx.blockFactory(), selected);
+ }
+
+ public static class GroupingState implements GroupingAggregatorState {
+ private final DoubleIntBucketedSort sort;
+
+ private GroupingState(BigArrays bigArrays, int limit, boolean ascending) {
+ this.sort = new DoubleIntBucketedSort(bigArrays, ascending ? SortOrder.ASC : SortOrder.DESC, limit);
+ }
+
+ public void add(int groupId, double value, int outputValue) {
+ sort.collect(value, outputValue, groupId);
+ }
+
+ @Override
+ public void toIntermediate(Block[] blocks, int offset, IntVector selected, DriverContext driverContext) {
+ sort.toBlocks(driverContext.blockFactory(), blocks, offset, selected);
+ }
+
+ Block toBlock(BlockFactory blockFactory, IntVector selected) {
+ Block[] blocks = new Block[2];
+ sort.toBlocks(blockFactory, blocks, 0, selected);
+ Releasables.close(blocks[0]);
+ return blocks[1];
+ }
+
+ @Override
+ public void enableGroupIdTracking(SeenGroupIds seen) {
+ // we figure out seen values from nulls on the values block
+ }
+
+ @Override
+ public void close() {
+ Releasables.closeExpectNoException(sort);
+ }
+ }
+
+ public static class SingleState implements AggregatorState {
+ private final GroupingState internalState;
+
+ private SingleState(BigArrays bigArrays, int limit, boolean ascending) {
+ this.internalState = new GroupingState(bigArrays, limit, ascending);
+ }
+
+ public void add(double value, int outputValue) {
+ internalState.add(0, value, outputValue);
+ }
+
+ @Override
+ public void toIntermediate(Block[] blocks, int offset, DriverContext driverContext) {
+ try (var intValues = driverContext.blockFactory().newConstantIntVector(0, 1)) {
+ internalState.toIntermediate(blocks, offset, intValues, driverContext);
+ }
+ }
+
+ Block toBlock(BlockFactory blockFactory) {
+ try (var intValues = blockFactory.newConstantIntVector(0, 1)) {
+ return internalState.toBlock(blockFactory, intValues);
+ }
+ }
+
+ @Override
+ public void close() {
+ Releasables.closeExpectNoException(internalState);
+ }
+ }
+}
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopDoubleLongAggregator.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopDoubleLongAggregator.java
new file mode 100644
index 0000000000000..57dd898fce1e4
--- /dev/null
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopDoubleLongAggregator.java
@@ -0,0 +1,140 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.aggregation;
+
+// begin generated imports
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.breaker.CircuitBreaker;
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.compute.ann.Aggregator;
+import org.elasticsearch.compute.ann.GroupingAggregator;
+import org.elasticsearch.compute.ann.IntermediateState;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.DoubleBlock;
+import org.elasticsearch.compute.data.LongBlock;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.compute.data.sort.DoubleLongBucketedSort;
+import org.elasticsearch.compute.operator.DriverContext;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.search.sort.SortOrder;
+// end generated imports
+
+/**
+ * Aggregates the top N field values for double.
+ *
+ * This class is generated. Edit `X-TopAggregator.java.st` to edit this file.
+ *
+ */
+@Aggregator({ @IntermediateState(name = "top", type = "DOUBLE_BLOCK"), @IntermediateState(name = "output", type = "LONG_BLOCK") })
+@GroupingAggregator
+class TopDoubleLongAggregator {
+ public static SingleState initSingle(BigArrays bigArrays, int limit, boolean ascending) {
+ return new SingleState(bigArrays, limit, ascending);
+ }
+
+ public static void combine(SingleState state, double v, long outputValue) {
+ state.add(v, outputValue);
+ }
+
+ public static void combineIntermediate(SingleState state, DoubleBlock values, LongBlock outputValues) {
+ int start = values.getFirstValueIndex(0);
+ int end = start + values.getValueCount(0);
+ for (int i = start; i < end; i++) {
+ combine(state, values.getDouble(i), outputValues.getLong(i));
+ }
+ }
+
+ public static Block evaluateFinal(SingleState state, DriverContext driverContext) {
+ return state.toBlock(driverContext.blockFactory());
+ }
+
+ public static GroupingState initGrouping(BigArrays bigArrays, int limit, boolean ascending) {
+ return new GroupingState(bigArrays, limit, ascending);
+ }
+
+ public static void combine(GroupingState state, int groupId, double v, long outputValue) {
+ state.add(groupId, v, outputValue);
+ }
+
+ public static void combineIntermediate(GroupingState state, int groupId, DoubleBlock values, LongBlock outputValues, int valuesPosition) {
+ int start = values.getFirstValueIndex(valuesPosition);
+ int end = start + values.getValueCount(valuesPosition);
+ for (int i = start; i < end; i++) {
+ combine(state, groupId, values.getDouble(i), outputValues.getLong(i));
+ }
+ }
+
+ public static Block evaluateFinal(GroupingState state, IntVector selected, GroupingAggregatorEvaluationContext ctx) {
+ return state.toBlock(ctx.blockFactory(), selected);
+ }
+
+ public static class GroupingState implements GroupingAggregatorState {
+ private final DoubleLongBucketedSort sort;
+
+ private GroupingState(BigArrays bigArrays, int limit, boolean ascending) {
+ this.sort = new DoubleLongBucketedSort(bigArrays, ascending ? SortOrder.ASC : SortOrder.DESC, limit);
+ }
+
+ public void add(int groupId, double value, long outputValue) {
+ sort.collect(value, outputValue, groupId);
+ }
+
+ @Override
+ public void toIntermediate(Block[] blocks, int offset, IntVector selected, DriverContext driverContext) {
+ sort.toBlocks(driverContext.blockFactory(), blocks, offset, selected);
+ }
+
+ Block toBlock(BlockFactory blockFactory, IntVector selected) {
+ Block[] blocks = new Block[2];
+ sort.toBlocks(blockFactory, blocks, 0, selected);
+ Releasables.close(blocks[0]);
+ return blocks[1];
+ }
+
+ @Override
+ public void enableGroupIdTracking(SeenGroupIds seen) {
+ // we figure out seen values from nulls on the values block
+ }
+
+ @Override
+ public void close() {
+ Releasables.closeExpectNoException(sort);
+ }
+ }
+
+ public static class SingleState implements AggregatorState {
+ private final GroupingState internalState;
+
+ private SingleState(BigArrays bigArrays, int limit, boolean ascending) {
+ this.internalState = new GroupingState(bigArrays, limit, ascending);
+ }
+
+ public void add(double value, long outputValue) {
+ internalState.add(0, value, outputValue);
+ }
+
+ @Override
+ public void toIntermediate(Block[] blocks, int offset, DriverContext driverContext) {
+ try (var intValues = driverContext.blockFactory().newConstantIntVector(0, 1)) {
+ internalState.toIntermediate(blocks, offset, intValues, driverContext);
+ }
+ }
+
+ Block toBlock(BlockFactory blockFactory) {
+ try (var intValues = blockFactory.newConstantIntVector(0, 1)) {
+ return internalState.toBlock(blockFactory, intValues);
+ }
+ }
+
+ @Override
+ public void close() {
+ Releasables.closeExpectNoException(internalState);
+ }
+ }
+}
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopFloatAggregator.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopFloatAggregator.java
index 1b5fddc0b0038..25e20ec26ffa0 100644
--- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopFloatAggregator.java
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopFloatAggregator.java
@@ -18,7 +18,6 @@
import org.elasticsearch.compute.data.BlockFactory;
import org.elasticsearch.compute.data.FloatBlock;
import org.elasticsearch.compute.data.IntVector;
-import org.elasticsearch.compute.data.FloatBlock;
import org.elasticsearch.compute.data.sort.FloatBucketedSort;
import org.elasticsearch.compute.operator.DriverContext;
import org.elasticsearch.core.Releasables;
@@ -118,7 +117,9 @@ public void add(float value) {
@Override
public void toIntermediate(Block[] blocks, int offset, DriverContext driverContext) {
- blocks[offset] = toBlock(driverContext.blockFactory());
+ try (var intValues = driverContext.blockFactory().newConstantIntVector(0, 1)) {
+ internalState.toIntermediate(blocks, offset, intValues, driverContext);
+ }
}
Block toBlock(BlockFactory blockFactory) {
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopFloatDoubleAggregator.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopFloatDoubleAggregator.java
new file mode 100644
index 0000000000000..ae4ba17da2529
--- /dev/null
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopFloatDoubleAggregator.java
@@ -0,0 +1,140 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.aggregation;
+
+// begin generated imports
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.breaker.CircuitBreaker;
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.compute.ann.Aggregator;
+import org.elasticsearch.compute.ann.GroupingAggregator;
+import org.elasticsearch.compute.ann.IntermediateState;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.FloatBlock;
+import org.elasticsearch.compute.data.DoubleBlock;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.compute.data.sort.FloatDoubleBucketedSort;
+import org.elasticsearch.compute.operator.DriverContext;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.search.sort.SortOrder;
+// end generated imports
+
+/**
+ * Aggregates the top N field values for float.
+ *
+ * This class is generated. Edit `X-TopAggregator.java.st` to edit this file.
+ *
+ */
+@Aggregator({ @IntermediateState(name = "top", type = "FLOAT_BLOCK"), @IntermediateState(name = "output", type = "DOUBLE_BLOCK") })
+@GroupingAggregator
+class TopFloatDoubleAggregator {
+ public static SingleState initSingle(BigArrays bigArrays, int limit, boolean ascending) {
+ return new SingleState(bigArrays, limit, ascending);
+ }
+
+ public static void combine(SingleState state, float v, double outputValue) {
+ state.add(v, outputValue);
+ }
+
+ public static void combineIntermediate(SingleState state, FloatBlock values, DoubleBlock outputValues) {
+ int start = values.getFirstValueIndex(0);
+ int end = start + values.getValueCount(0);
+ for (int i = start; i < end; i++) {
+ combine(state, values.getFloat(i), outputValues.getDouble(i));
+ }
+ }
+
+ public static Block evaluateFinal(SingleState state, DriverContext driverContext) {
+ return state.toBlock(driverContext.blockFactory());
+ }
+
+ public static GroupingState initGrouping(BigArrays bigArrays, int limit, boolean ascending) {
+ return new GroupingState(bigArrays, limit, ascending);
+ }
+
+ public static void combine(GroupingState state, int groupId, float v, double outputValue) {
+ state.add(groupId, v, outputValue);
+ }
+
+ public static void combineIntermediate(GroupingState state, int groupId, FloatBlock values, DoubleBlock outputValues, int valuesPosition) {
+ int start = values.getFirstValueIndex(valuesPosition);
+ int end = start + values.getValueCount(valuesPosition);
+ for (int i = start; i < end; i++) {
+ combine(state, groupId, values.getFloat(i), outputValues.getDouble(i));
+ }
+ }
+
+ public static Block evaluateFinal(GroupingState state, IntVector selected, GroupingAggregatorEvaluationContext ctx) {
+ return state.toBlock(ctx.blockFactory(), selected);
+ }
+
+ public static class GroupingState implements GroupingAggregatorState {
+ private final FloatDoubleBucketedSort sort;
+
+ private GroupingState(BigArrays bigArrays, int limit, boolean ascending) {
+ this.sort = new FloatDoubleBucketedSort(bigArrays, ascending ? SortOrder.ASC : SortOrder.DESC, limit);
+ }
+
+ public void add(int groupId, float value, double outputValue) {
+ sort.collect(value, outputValue, groupId);
+ }
+
+ @Override
+ public void toIntermediate(Block[] blocks, int offset, IntVector selected, DriverContext driverContext) {
+ sort.toBlocks(driverContext.blockFactory(), blocks, offset, selected);
+ }
+
+ Block toBlock(BlockFactory blockFactory, IntVector selected) {
+ Block[] blocks = new Block[2];
+ sort.toBlocks(blockFactory, blocks, 0, selected);
+ Releasables.close(blocks[0]);
+ return blocks[1];
+ }
+
+ @Override
+ public void enableGroupIdTracking(SeenGroupIds seen) {
+ // we figure out seen values from nulls on the values block
+ }
+
+ @Override
+ public void close() {
+ Releasables.closeExpectNoException(sort);
+ }
+ }
+
+ public static class SingleState implements AggregatorState {
+ private final GroupingState internalState;
+
+ private SingleState(BigArrays bigArrays, int limit, boolean ascending) {
+ this.internalState = new GroupingState(bigArrays, limit, ascending);
+ }
+
+ public void add(float value, double outputValue) {
+ internalState.add(0, value, outputValue);
+ }
+
+ @Override
+ public void toIntermediate(Block[] blocks, int offset, DriverContext driverContext) {
+ try (var intValues = driverContext.blockFactory().newConstantIntVector(0, 1)) {
+ internalState.toIntermediate(blocks, offset, intValues, driverContext);
+ }
+ }
+
+ Block toBlock(BlockFactory blockFactory) {
+ try (var intValues = blockFactory.newConstantIntVector(0, 1)) {
+ return internalState.toBlock(blockFactory, intValues);
+ }
+ }
+
+ @Override
+ public void close() {
+ Releasables.closeExpectNoException(internalState);
+ }
+ }
+}
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopFloatFloatAggregator.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopFloatFloatAggregator.java
new file mode 100644
index 0000000000000..e2876b5cddffa
--- /dev/null
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopFloatFloatAggregator.java
@@ -0,0 +1,140 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.aggregation;
+
+// begin generated imports
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.breaker.CircuitBreaker;
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.compute.ann.Aggregator;
+import org.elasticsearch.compute.ann.GroupingAggregator;
+import org.elasticsearch.compute.ann.IntermediateState;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.FloatBlock;
+import org.elasticsearch.compute.data.FloatBlock;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.compute.data.sort.FloatFloatBucketedSort;
+import org.elasticsearch.compute.operator.DriverContext;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.search.sort.SortOrder;
+// end generated imports
+
+/**
+ * Aggregates the top N field values for float.
+ *
+ * This class is generated. Edit `X-TopAggregator.java.st` to edit this file.
+ *
+ */
+@Aggregator({ @IntermediateState(name = "top", type = "FLOAT_BLOCK"), @IntermediateState(name = "output", type = "FLOAT_BLOCK") })
+@GroupingAggregator
+class TopFloatFloatAggregator {
+ public static SingleState initSingle(BigArrays bigArrays, int limit, boolean ascending) {
+ return new SingleState(bigArrays, limit, ascending);
+ }
+
+ public static void combine(SingleState state, float v, float outputValue) {
+ state.add(v, outputValue);
+ }
+
+ public static void combineIntermediate(SingleState state, FloatBlock values, FloatBlock outputValues) {
+ int start = values.getFirstValueIndex(0);
+ int end = start + values.getValueCount(0);
+ for (int i = start; i < end; i++) {
+ combine(state, values.getFloat(i), outputValues.getFloat(i));
+ }
+ }
+
+ public static Block evaluateFinal(SingleState state, DriverContext driverContext) {
+ return state.toBlock(driverContext.blockFactory());
+ }
+
+ public static GroupingState initGrouping(BigArrays bigArrays, int limit, boolean ascending) {
+ return new GroupingState(bigArrays, limit, ascending);
+ }
+
+ public static void combine(GroupingState state, int groupId, float v, float outputValue) {
+ state.add(groupId, v, outputValue);
+ }
+
+ public static void combineIntermediate(GroupingState state, int groupId, FloatBlock values, FloatBlock outputValues, int valuesPosition) {
+ int start = values.getFirstValueIndex(valuesPosition);
+ int end = start + values.getValueCount(valuesPosition);
+ for (int i = start; i < end; i++) {
+ combine(state, groupId, values.getFloat(i), outputValues.getFloat(i));
+ }
+ }
+
+ public static Block evaluateFinal(GroupingState state, IntVector selected, GroupingAggregatorEvaluationContext ctx) {
+ return state.toBlock(ctx.blockFactory(), selected);
+ }
+
+ public static class GroupingState implements GroupingAggregatorState {
+ private final FloatFloatBucketedSort sort;
+
+ private GroupingState(BigArrays bigArrays, int limit, boolean ascending) {
+ this.sort = new FloatFloatBucketedSort(bigArrays, ascending ? SortOrder.ASC : SortOrder.DESC, limit);
+ }
+
+ public void add(int groupId, float value, float outputValue) {
+ sort.collect(value, outputValue, groupId);
+ }
+
+ @Override
+ public void toIntermediate(Block[] blocks, int offset, IntVector selected, DriverContext driverContext) {
+ sort.toBlocks(driverContext.blockFactory(), blocks, offset, selected);
+ }
+
+ Block toBlock(BlockFactory blockFactory, IntVector selected) {
+ Block[] blocks = new Block[2];
+ sort.toBlocks(blockFactory, blocks, 0, selected);
+ Releasables.close(blocks[0]);
+ return blocks[1];
+ }
+
+ @Override
+ public void enableGroupIdTracking(SeenGroupIds seen) {
+ // we figure out seen values from nulls on the values block
+ }
+
+ @Override
+ public void close() {
+ Releasables.closeExpectNoException(sort);
+ }
+ }
+
+ public static class SingleState implements AggregatorState {
+ private final GroupingState internalState;
+
+ private SingleState(BigArrays bigArrays, int limit, boolean ascending) {
+ this.internalState = new GroupingState(bigArrays, limit, ascending);
+ }
+
+ public void add(float value, float outputValue) {
+ internalState.add(0, value, outputValue);
+ }
+
+ @Override
+ public void toIntermediate(Block[] blocks, int offset, DriverContext driverContext) {
+ try (var intValues = driverContext.blockFactory().newConstantIntVector(0, 1)) {
+ internalState.toIntermediate(blocks, offset, intValues, driverContext);
+ }
+ }
+
+ Block toBlock(BlockFactory blockFactory) {
+ try (var intValues = blockFactory.newConstantIntVector(0, 1)) {
+ return internalState.toBlock(blockFactory, intValues);
+ }
+ }
+
+ @Override
+ public void close() {
+ Releasables.closeExpectNoException(internalState);
+ }
+ }
+}
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopFloatIntAggregator.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopFloatIntAggregator.java
new file mode 100644
index 0000000000000..0de205c508713
--- /dev/null
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopFloatIntAggregator.java
@@ -0,0 +1,140 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.aggregation;
+
+// begin generated imports
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.breaker.CircuitBreaker;
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.compute.ann.Aggregator;
+import org.elasticsearch.compute.ann.GroupingAggregator;
+import org.elasticsearch.compute.ann.IntermediateState;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.FloatBlock;
+import org.elasticsearch.compute.data.IntBlock;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.compute.data.sort.FloatIntBucketedSort;
+import org.elasticsearch.compute.operator.DriverContext;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.search.sort.SortOrder;
+// end generated imports
+
+/**
+ * Aggregates the top N field values for float.
+ *
+ * This class is generated. Edit `X-TopAggregator.java.st` to edit this file.
+ *
+ */
+@Aggregator({ @IntermediateState(name = "top", type = "FLOAT_BLOCK"), @IntermediateState(name = "output", type = "INT_BLOCK") })
+@GroupingAggregator
+class TopFloatIntAggregator {
+ public static SingleState initSingle(BigArrays bigArrays, int limit, boolean ascending) {
+ return new SingleState(bigArrays, limit, ascending);
+ }
+
+ public static void combine(SingleState state, float v, int outputValue) {
+ state.add(v, outputValue);
+ }
+
+ public static void combineIntermediate(SingleState state, FloatBlock values, IntBlock outputValues) {
+ int start = values.getFirstValueIndex(0);
+ int end = start + values.getValueCount(0);
+ for (int i = start; i < end; i++) {
+ combine(state, values.getFloat(i), outputValues.getInt(i));
+ }
+ }
+
+ public static Block evaluateFinal(SingleState state, DriverContext driverContext) {
+ return state.toBlock(driverContext.blockFactory());
+ }
+
+ public static GroupingState initGrouping(BigArrays bigArrays, int limit, boolean ascending) {
+ return new GroupingState(bigArrays, limit, ascending);
+ }
+
+ public static void combine(GroupingState state, int groupId, float v, int outputValue) {
+ state.add(groupId, v, outputValue);
+ }
+
+ public static void combineIntermediate(GroupingState state, int groupId, FloatBlock values, IntBlock outputValues, int valuesPosition) {
+ int start = values.getFirstValueIndex(valuesPosition);
+ int end = start + values.getValueCount(valuesPosition);
+ for (int i = start; i < end; i++) {
+ combine(state, groupId, values.getFloat(i), outputValues.getInt(i));
+ }
+ }
+
+ public static Block evaluateFinal(GroupingState state, IntVector selected, GroupingAggregatorEvaluationContext ctx) {
+ return state.toBlock(ctx.blockFactory(), selected);
+ }
+
+ public static class GroupingState implements GroupingAggregatorState {
+ private final FloatIntBucketedSort sort;
+
+ private GroupingState(BigArrays bigArrays, int limit, boolean ascending) {
+ this.sort = new FloatIntBucketedSort(bigArrays, ascending ? SortOrder.ASC : SortOrder.DESC, limit);
+ }
+
+ public void add(int groupId, float value, int outputValue) {
+ sort.collect(value, outputValue, groupId);
+ }
+
+ @Override
+ public void toIntermediate(Block[] blocks, int offset, IntVector selected, DriverContext driverContext) {
+ sort.toBlocks(driverContext.blockFactory(), blocks, offset, selected);
+ }
+
+ Block toBlock(BlockFactory blockFactory, IntVector selected) {
+ Block[] blocks = new Block[2];
+ sort.toBlocks(blockFactory, blocks, 0, selected);
+ Releasables.close(blocks[0]);
+ return blocks[1];
+ }
+
+ @Override
+ public void enableGroupIdTracking(SeenGroupIds seen) {
+ // we figure out seen values from nulls on the values block
+ }
+
+ @Override
+ public void close() {
+ Releasables.closeExpectNoException(sort);
+ }
+ }
+
+ public static class SingleState implements AggregatorState {
+ private final GroupingState internalState;
+
+ private SingleState(BigArrays bigArrays, int limit, boolean ascending) {
+ this.internalState = new GroupingState(bigArrays, limit, ascending);
+ }
+
+ public void add(float value, int outputValue) {
+ internalState.add(0, value, outputValue);
+ }
+
+ @Override
+ public void toIntermediate(Block[] blocks, int offset, DriverContext driverContext) {
+ try (var intValues = driverContext.blockFactory().newConstantIntVector(0, 1)) {
+ internalState.toIntermediate(blocks, offset, intValues, driverContext);
+ }
+ }
+
+ Block toBlock(BlockFactory blockFactory) {
+ try (var intValues = blockFactory.newConstantIntVector(0, 1)) {
+ return internalState.toBlock(blockFactory, intValues);
+ }
+ }
+
+ @Override
+ public void close() {
+ Releasables.closeExpectNoException(internalState);
+ }
+ }
+}
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopFloatLongAggregator.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopFloatLongAggregator.java
new file mode 100644
index 0000000000000..580d649abae42
--- /dev/null
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopFloatLongAggregator.java
@@ -0,0 +1,140 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.aggregation;
+
+// begin generated imports
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.breaker.CircuitBreaker;
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.compute.ann.Aggregator;
+import org.elasticsearch.compute.ann.GroupingAggregator;
+import org.elasticsearch.compute.ann.IntermediateState;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.FloatBlock;
+import org.elasticsearch.compute.data.LongBlock;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.compute.data.sort.FloatLongBucketedSort;
+import org.elasticsearch.compute.operator.DriverContext;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.search.sort.SortOrder;
+// end generated imports
+
+/**
+ * Aggregates the top N field values for float.
+ *
+ * This class is generated. Edit `X-TopAggregator.java.st` to edit this file.
+ *
+ */
+@Aggregator({ @IntermediateState(name = "top", type = "FLOAT_BLOCK"), @IntermediateState(name = "output", type = "LONG_BLOCK") })
+@GroupingAggregator
+class TopFloatLongAggregator {
+ public static SingleState initSingle(BigArrays bigArrays, int limit, boolean ascending) {
+ return new SingleState(bigArrays, limit, ascending);
+ }
+
+ public static void combine(SingleState state, float v, long outputValue) {
+ state.add(v, outputValue);
+ }
+
+ public static void combineIntermediate(SingleState state, FloatBlock values, LongBlock outputValues) {
+ int start = values.getFirstValueIndex(0);
+ int end = start + values.getValueCount(0);
+ for (int i = start; i < end; i++) {
+ combine(state, values.getFloat(i), outputValues.getLong(i));
+ }
+ }
+
+ public static Block evaluateFinal(SingleState state, DriverContext driverContext) {
+ return state.toBlock(driverContext.blockFactory());
+ }
+
+ public static GroupingState initGrouping(BigArrays bigArrays, int limit, boolean ascending) {
+ return new GroupingState(bigArrays, limit, ascending);
+ }
+
+ public static void combine(GroupingState state, int groupId, float v, long outputValue) {
+ state.add(groupId, v, outputValue);
+ }
+
+ public static void combineIntermediate(GroupingState state, int groupId, FloatBlock values, LongBlock outputValues, int valuesPosition) {
+ int start = values.getFirstValueIndex(valuesPosition);
+ int end = start + values.getValueCount(valuesPosition);
+ for (int i = start; i < end; i++) {
+ combine(state, groupId, values.getFloat(i), outputValues.getLong(i));
+ }
+ }
+
+ public static Block evaluateFinal(GroupingState state, IntVector selected, GroupingAggregatorEvaluationContext ctx) {
+ return state.toBlock(ctx.blockFactory(), selected);
+ }
+
+ public static class GroupingState implements GroupingAggregatorState {
+ private final FloatLongBucketedSort sort;
+
+ private GroupingState(BigArrays bigArrays, int limit, boolean ascending) {
+ this.sort = new FloatLongBucketedSort(bigArrays, ascending ? SortOrder.ASC : SortOrder.DESC, limit);
+ }
+
+ public void add(int groupId, float value, long outputValue) {
+ sort.collect(value, outputValue, groupId);
+ }
+
+ @Override
+ public void toIntermediate(Block[] blocks, int offset, IntVector selected, DriverContext driverContext) {
+ sort.toBlocks(driverContext.blockFactory(), blocks, offset, selected);
+ }
+
+ Block toBlock(BlockFactory blockFactory, IntVector selected) {
+ Block[] blocks = new Block[2];
+ sort.toBlocks(blockFactory, blocks, 0, selected);
+ Releasables.close(blocks[0]);
+ return blocks[1];
+ }
+
+ @Override
+ public void enableGroupIdTracking(SeenGroupIds seen) {
+ // we figure out seen values from nulls on the values block
+ }
+
+ @Override
+ public void close() {
+ Releasables.closeExpectNoException(sort);
+ }
+ }
+
+ public static class SingleState implements AggregatorState {
+ private final GroupingState internalState;
+
+ private SingleState(BigArrays bigArrays, int limit, boolean ascending) {
+ this.internalState = new GroupingState(bigArrays, limit, ascending);
+ }
+
+ public void add(float value, long outputValue) {
+ internalState.add(0, value, outputValue);
+ }
+
+ @Override
+ public void toIntermediate(Block[] blocks, int offset, DriverContext driverContext) {
+ try (var intValues = driverContext.blockFactory().newConstantIntVector(0, 1)) {
+ internalState.toIntermediate(blocks, offset, intValues, driverContext);
+ }
+ }
+
+ Block toBlock(BlockFactory blockFactory) {
+ try (var intValues = blockFactory.newConstantIntVector(0, 1)) {
+ return internalState.toBlock(blockFactory, intValues);
+ }
+ }
+
+ @Override
+ public void close() {
+ Releasables.closeExpectNoException(internalState);
+ }
+ }
+}
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopIntAggregator.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopIntAggregator.java
index aa8c5e8e1bf3f..7fc6053bf7681 100644
--- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopIntAggregator.java
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopIntAggregator.java
@@ -18,7 +18,6 @@
import org.elasticsearch.compute.data.BlockFactory;
import org.elasticsearch.compute.data.IntBlock;
import org.elasticsearch.compute.data.IntVector;
-import org.elasticsearch.compute.data.IntBlock;
import org.elasticsearch.compute.data.sort.IntBucketedSort;
import org.elasticsearch.compute.operator.DriverContext;
import org.elasticsearch.core.Releasables;
@@ -118,7 +117,9 @@ public void add(int value) {
@Override
public void toIntermediate(Block[] blocks, int offset, DriverContext driverContext) {
- blocks[offset] = toBlock(driverContext.blockFactory());
+ try (var intValues = driverContext.blockFactory().newConstantIntVector(0, 1)) {
+ internalState.toIntermediate(blocks, offset, intValues, driverContext);
+ }
}
Block toBlock(BlockFactory blockFactory) {
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopIntDoubleAggregator.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopIntDoubleAggregator.java
new file mode 100644
index 0000000000000..10090a179b5bb
--- /dev/null
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopIntDoubleAggregator.java
@@ -0,0 +1,140 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.aggregation;
+
+// begin generated imports
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.breaker.CircuitBreaker;
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.compute.ann.Aggregator;
+import org.elasticsearch.compute.ann.GroupingAggregator;
+import org.elasticsearch.compute.ann.IntermediateState;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.IntBlock;
+import org.elasticsearch.compute.data.DoubleBlock;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.compute.data.sort.IntDoubleBucketedSort;
+import org.elasticsearch.compute.operator.DriverContext;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.search.sort.SortOrder;
+// end generated imports
+
+/**
+ * Aggregates the top N field values for int.
+ *
+ * This class is generated. Edit `X-TopAggregator.java.st` to edit this file.
+ *
+ */
+@Aggregator({ @IntermediateState(name = "top", type = "INT_BLOCK"), @IntermediateState(name = "output", type = "DOUBLE_BLOCK") })
+@GroupingAggregator
+class TopIntDoubleAggregator {
+ public static SingleState initSingle(BigArrays bigArrays, int limit, boolean ascending) {
+ return new SingleState(bigArrays, limit, ascending);
+ }
+
+ public static void combine(SingleState state, int v, double outputValue) {
+ state.add(v, outputValue);
+ }
+
+ public static void combineIntermediate(SingleState state, IntBlock values, DoubleBlock outputValues) {
+ int start = values.getFirstValueIndex(0);
+ int end = start + values.getValueCount(0);
+ for (int i = start; i < end; i++) {
+ combine(state, values.getInt(i), outputValues.getDouble(i));
+ }
+ }
+
+ public static Block evaluateFinal(SingleState state, DriverContext driverContext) {
+ return state.toBlock(driverContext.blockFactory());
+ }
+
+ public static GroupingState initGrouping(BigArrays bigArrays, int limit, boolean ascending) {
+ return new GroupingState(bigArrays, limit, ascending);
+ }
+
+ public static void combine(GroupingState state, int groupId, int v, double outputValue) {
+ state.add(groupId, v, outputValue);
+ }
+
+ public static void combineIntermediate(GroupingState state, int groupId, IntBlock values, DoubleBlock outputValues, int valuesPosition) {
+ int start = values.getFirstValueIndex(valuesPosition);
+ int end = start + values.getValueCount(valuesPosition);
+ for (int i = start; i < end; i++) {
+ combine(state, groupId, values.getInt(i), outputValues.getDouble(i));
+ }
+ }
+
+ public static Block evaluateFinal(GroupingState state, IntVector selected, GroupingAggregatorEvaluationContext ctx) {
+ return state.toBlock(ctx.blockFactory(), selected);
+ }
+
+ public static class GroupingState implements GroupingAggregatorState {
+ private final IntDoubleBucketedSort sort;
+
+ private GroupingState(BigArrays bigArrays, int limit, boolean ascending) {
+ this.sort = new IntDoubleBucketedSort(bigArrays, ascending ? SortOrder.ASC : SortOrder.DESC, limit);
+ }
+
+ public void add(int groupId, int value, double outputValue) {
+ sort.collect(value, outputValue, groupId);
+ }
+
+ @Override
+ public void toIntermediate(Block[] blocks, int offset, IntVector selected, DriverContext driverContext) {
+ sort.toBlocks(driverContext.blockFactory(), blocks, offset, selected);
+ }
+
+ Block toBlock(BlockFactory blockFactory, IntVector selected) {
+ Block[] blocks = new Block[2];
+ sort.toBlocks(blockFactory, blocks, 0, selected);
+ Releasables.close(blocks[0]);
+ return blocks[1];
+ }
+
+ @Override
+ public void enableGroupIdTracking(SeenGroupIds seen) {
+ // we figure out seen values from nulls on the values block
+ }
+
+ @Override
+ public void close() {
+ Releasables.closeExpectNoException(sort);
+ }
+ }
+
+ public static class SingleState implements AggregatorState {
+ private final GroupingState internalState;
+
+ private SingleState(BigArrays bigArrays, int limit, boolean ascending) {
+ this.internalState = new GroupingState(bigArrays, limit, ascending);
+ }
+
+ public void add(int value, double outputValue) {
+ internalState.add(0, value, outputValue);
+ }
+
+ @Override
+ public void toIntermediate(Block[] blocks, int offset, DriverContext driverContext) {
+ try (var intValues = driverContext.blockFactory().newConstantIntVector(0, 1)) {
+ internalState.toIntermediate(blocks, offset, intValues, driverContext);
+ }
+ }
+
+ Block toBlock(BlockFactory blockFactory) {
+ try (var intValues = blockFactory.newConstantIntVector(0, 1)) {
+ return internalState.toBlock(blockFactory, intValues);
+ }
+ }
+
+ @Override
+ public void close() {
+ Releasables.closeExpectNoException(internalState);
+ }
+ }
+}
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopIntFloatAggregator.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopIntFloatAggregator.java
new file mode 100644
index 0000000000000..e007e66e8b526
--- /dev/null
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopIntFloatAggregator.java
@@ -0,0 +1,140 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.aggregation;
+
+// begin generated imports
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.breaker.CircuitBreaker;
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.compute.ann.Aggregator;
+import org.elasticsearch.compute.ann.GroupingAggregator;
+import org.elasticsearch.compute.ann.IntermediateState;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.IntBlock;
+import org.elasticsearch.compute.data.FloatBlock;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.compute.data.sort.IntFloatBucketedSort;
+import org.elasticsearch.compute.operator.DriverContext;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.search.sort.SortOrder;
+// end generated imports
+
+/**
+ * Aggregates the top N field values for int.
+ *
+ * This class is generated. Edit `X-TopAggregator.java.st` to edit this file.
+ *
+ */
+@Aggregator({ @IntermediateState(name = "top", type = "INT_BLOCK"), @IntermediateState(name = "output", type = "FLOAT_BLOCK") })
+@GroupingAggregator
+class TopIntFloatAggregator {
+ public static SingleState initSingle(BigArrays bigArrays, int limit, boolean ascending) {
+ return new SingleState(bigArrays, limit, ascending);
+ }
+
+ public static void combine(SingleState state, int v, float outputValue) {
+ state.add(v, outputValue);
+ }
+
+ public static void combineIntermediate(SingleState state, IntBlock values, FloatBlock outputValues) {
+ int start = values.getFirstValueIndex(0);
+ int end = start + values.getValueCount(0);
+ for (int i = start; i < end; i++) {
+ combine(state, values.getInt(i), outputValues.getFloat(i));
+ }
+ }
+
+ public static Block evaluateFinal(SingleState state, DriverContext driverContext) {
+ return state.toBlock(driverContext.blockFactory());
+ }
+
+ public static GroupingState initGrouping(BigArrays bigArrays, int limit, boolean ascending) {
+ return new GroupingState(bigArrays, limit, ascending);
+ }
+
+ public static void combine(GroupingState state, int groupId, int v, float outputValue) {
+ state.add(groupId, v, outputValue);
+ }
+
+ public static void combineIntermediate(GroupingState state, int groupId, IntBlock values, FloatBlock outputValues, int valuesPosition) {
+ int start = values.getFirstValueIndex(valuesPosition);
+ int end = start + values.getValueCount(valuesPosition);
+ for (int i = start; i < end; i++) {
+ combine(state, groupId, values.getInt(i), outputValues.getFloat(i));
+ }
+ }
+
+ public static Block evaluateFinal(GroupingState state, IntVector selected, GroupingAggregatorEvaluationContext ctx) {
+ return state.toBlock(ctx.blockFactory(), selected);
+ }
+
+ public static class GroupingState implements GroupingAggregatorState {
+ private final IntFloatBucketedSort sort;
+
+ private GroupingState(BigArrays bigArrays, int limit, boolean ascending) {
+ this.sort = new IntFloatBucketedSort(bigArrays, ascending ? SortOrder.ASC : SortOrder.DESC, limit);
+ }
+
+ public void add(int groupId, int value, float outputValue) {
+ sort.collect(value, outputValue, groupId);
+ }
+
+ @Override
+ public void toIntermediate(Block[] blocks, int offset, IntVector selected, DriverContext driverContext) {
+ sort.toBlocks(driverContext.blockFactory(), blocks, offset, selected);
+ }
+
+ Block toBlock(BlockFactory blockFactory, IntVector selected) {
+ Block[] blocks = new Block[2];
+ sort.toBlocks(blockFactory, blocks, 0, selected);
+ Releasables.close(blocks[0]);
+ return blocks[1];
+ }
+
+ @Override
+ public void enableGroupIdTracking(SeenGroupIds seen) {
+ // we figure out seen values from nulls on the values block
+ }
+
+ @Override
+ public void close() {
+ Releasables.closeExpectNoException(sort);
+ }
+ }
+
+ public static class SingleState implements AggregatorState {
+ private final GroupingState internalState;
+
+ private SingleState(BigArrays bigArrays, int limit, boolean ascending) {
+ this.internalState = new GroupingState(bigArrays, limit, ascending);
+ }
+
+ public void add(int value, float outputValue) {
+ internalState.add(0, value, outputValue);
+ }
+
+ @Override
+ public void toIntermediate(Block[] blocks, int offset, DriverContext driverContext) {
+ try (var intValues = driverContext.blockFactory().newConstantIntVector(0, 1)) {
+ internalState.toIntermediate(blocks, offset, intValues, driverContext);
+ }
+ }
+
+ Block toBlock(BlockFactory blockFactory) {
+ try (var intValues = blockFactory.newConstantIntVector(0, 1)) {
+ return internalState.toBlock(blockFactory, intValues);
+ }
+ }
+
+ @Override
+ public void close() {
+ Releasables.closeExpectNoException(internalState);
+ }
+ }
+}
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopIntIntAggregator.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopIntIntAggregator.java
new file mode 100644
index 0000000000000..72b2065e1fe77
--- /dev/null
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopIntIntAggregator.java
@@ -0,0 +1,140 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.aggregation;
+
+// begin generated imports
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.breaker.CircuitBreaker;
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.compute.ann.Aggregator;
+import org.elasticsearch.compute.ann.GroupingAggregator;
+import org.elasticsearch.compute.ann.IntermediateState;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.IntBlock;
+import org.elasticsearch.compute.data.IntBlock;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.compute.data.sort.IntIntBucketedSort;
+import org.elasticsearch.compute.operator.DriverContext;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.search.sort.SortOrder;
+// end generated imports
+
+/**
+ * Aggregates the top N field values for int.
+ *
+ * This class is generated. Edit `X-TopAggregator.java.st` to edit this file.
+ *
+ */
+@Aggregator({ @IntermediateState(name = "top", type = "INT_BLOCK"), @IntermediateState(name = "output", type = "INT_BLOCK") })
+@GroupingAggregator
+class TopIntIntAggregator {
+ public static SingleState initSingle(BigArrays bigArrays, int limit, boolean ascending) {
+ return new SingleState(bigArrays, limit, ascending);
+ }
+
+ public static void combine(SingleState state, int v, int outputValue) {
+ state.add(v, outputValue);
+ }
+
+ public static void combineIntermediate(SingleState state, IntBlock values, IntBlock outputValues) {
+ int start = values.getFirstValueIndex(0);
+ int end = start + values.getValueCount(0);
+ for (int i = start; i < end; i++) {
+ combine(state, values.getInt(i), outputValues.getInt(i));
+ }
+ }
+
+ public static Block evaluateFinal(SingleState state, DriverContext driverContext) {
+ return state.toBlock(driverContext.blockFactory());
+ }
+
+ public static GroupingState initGrouping(BigArrays bigArrays, int limit, boolean ascending) {
+ return new GroupingState(bigArrays, limit, ascending);
+ }
+
+ public static void combine(GroupingState state, int groupId, int v, int outputValue) {
+ state.add(groupId, v, outputValue);
+ }
+
+ public static void combineIntermediate(GroupingState state, int groupId, IntBlock values, IntBlock outputValues, int valuesPosition) {
+ int start = values.getFirstValueIndex(valuesPosition);
+ int end = start + values.getValueCount(valuesPosition);
+ for (int i = start; i < end; i++) {
+ combine(state, groupId, values.getInt(i), outputValues.getInt(i));
+ }
+ }
+
+ public static Block evaluateFinal(GroupingState state, IntVector selected, GroupingAggregatorEvaluationContext ctx) {
+ return state.toBlock(ctx.blockFactory(), selected);
+ }
+
+ public static class GroupingState implements GroupingAggregatorState {
+ private final IntIntBucketedSort sort;
+
+ private GroupingState(BigArrays bigArrays, int limit, boolean ascending) {
+ this.sort = new IntIntBucketedSort(bigArrays, ascending ? SortOrder.ASC : SortOrder.DESC, limit);
+ }
+
+ public void add(int groupId, int value, int outputValue) {
+ sort.collect(value, outputValue, groupId);
+ }
+
+ @Override
+ public void toIntermediate(Block[] blocks, int offset, IntVector selected, DriverContext driverContext) {
+ sort.toBlocks(driverContext.blockFactory(), blocks, offset, selected);
+ }
+
+ Block toBlock(BlockFactory blockFactory, IntVector selected) {
+ Block[] blocks = new Block[2];
+ sort.toBlocks(blockFactory, blocks, 0, selected);
+ Releasables.close(blocks[0]);
+ return blocks[1];
+ }
+
+ @Override
+ public void enableGroupIdTracking(SeenGroupIds seen) {
+ // we figure out seen values from nulls on the values block
+ }
+
+ @Override
+ public void close() {
+ Releasables.closeExpectNoException(sort);
+ }
+ }
+
+ public static class SingleState implements AggregatorState {
+ private final GroupingState internalState;
+
+ private SingleState(BigArrays bigArrays, int limit, boolean ascending) {
+ this.internalState = new GroupingState(bigArrays, limit, ascending);
+ }
+
+ public void add(int value, int outputValue) {
+ internalState.add(0, value, outputValue);
+ }
+
+ @Override
+ public void toIntermediate(Block[] blocks, int offset, DriverContext driverContext) {
+ try (var intValues = driverContext.blockFactory().newConstantIntVector(0, 1)) {
+ internalState.toIntermediate(blocks, offset, intValues, driverContext);
+ }
+ }
+
+ Block toBlock(BlockFactory blockFactory) {
+ try (var intValues = blockFactory.newConstantIntVector(0, 1)) {
+ return internalState.toBlock(blockFactory, intValues);
+ }
+ }
+
+ @Override
+ public void close() {
+ Releasables.closeExpectNoException(internalState);
+ }
+ }
+}
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopIntLongAggregator.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopIntLongAggregator.java
new file mode 100644
index 0000000000000..a3eba01bd34cf
--- /dev/null
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopIntLongAggregator.java
@@ -0,0 +1,140 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.aggregation;
+
+// begin generated imports
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.breaker.CircuitBreaker;
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.compute.ann.Aggregator;
+import org.elasticsearch.compute.ann.GroupingAggregator;
+import org.elasticsearch.compute.ann.IntermediateState;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.IntBlock;
+import org.elasticsearch.compute.data.LongBlock;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.compute.data.sort.IntLongBucketedSort;
+import org.elasticsearch.compute.operator.DriverContext;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.search.sort.SortOrder;
+// end generated imports
+
+/**
+ * Aggregates the top N field values for int.
+ *
+ * This class is generated. Edit `X-TopAggregator.java.st` to edit this file.
+ *
+ */
+@Aggregator({ @IntermediateState(name = "top", type = "INT_BLOCK"), @IntermediateState(name = "output", type = "LONG_BLOCK") })
+@GroupingAggregator
+class TopIntLongAggregator {
+ public static SingleState initSingle(BigArrays bigArrays, int limit, boolean ascending) {
+ return new SingleState(bigArrays, limit, ascending);
+ }
+
+ public static void combine(SingleState state, int v, long outputValue) {
+ state.add(v, outputValue);
+ }
+
+ public static void combineIntermediate(SingleState state, IntBlock values, LongBlock outputValues) {
+ int start = values.getFirstValueIndex(0);
+ int end = start + values.getValueCount(0);
+ for (int i = start; i < end; i++) {
+ combine(state, values.getInt(i), outputValues.getLong(i));
+ }
+ }
+
+ public static Block evaluateFinal(SingleState state, DriverContext driverContext) {
+ return state.toBlock(driverContext.blockFactory());
+ }
+
+ public static GroupingState initGrouping(BigArrays bigArrays, int limit, boolean ascending) {
+ return new GroupingState(bigArrays, limit, ascending);
+ }
+
+ public static void combine(GroupingState state, int groupId, int v, long outputValue) {
+ state.add(groupId, v, outputValue);
+ }
+
+ public static void combineIntermediate(GroupingState state, int groupId, IntBlock values, LongBlock outputValues, int valuesPosition) {
+ int start = values.getFirstValueIndex(valuesPosition);
+ int end = start + values.getValueCount(valuesPosition);
+ for (int i = start; i < end; i++) {
+ combine(state, groupId, values.getInt(i), outputValues.getLong(i));
+ }
+ }
+
+ public static Block evaluateFinal(GroupingState state, IntVector selected, GroupingAggregatorEvaluationContext ctx) {
+ return state.toBlock(ctx.blockFactory(), selected);
+ }
+
+ public static class GroupingState implements GroupingAggregatorState {
+ private final IntLongBucketedSort sort;
+
+ private GroupingState(BigArrays bigArrays, int limit, boolean ascending) {
+ this.sort = new IntLongBucketedSort(bigArrays, ascending ? SortOrder.ASC : SortOrder.DESC, limit);
+ }
+
+ public void add(int groupId, int value, long outputValue) {
+ sort.collect(value, outputValue, groupId);
+ }
+
+ @Override
+ public void toIntermediate(Block[] blocks, int offset, IntVector selected, DriverContext driverContext) {
+ sort.toBlocks(driverContext.blockFactory(), blocks, offset, selected);
+ }
+
+ Block toBlock(BlockFactory blockFactory, IntVector selected) {
+ Block[] blocks = new Block[2];
+ sort.toBlocks(blockFactory, blocks, 0, selected);
+ Releasables.close(blocks[0]);
+ return blocks[1];
+ }
+
+ @Override
+ public void enableGroupIdTracking(SeenGroupIds seen) {
+ // we figure out seen values from nulls on the values block
+ }
+
+ @Override
+ public void close() {
+ Releasables.closeExpectNoException(sort);
+ }
+ }
+
+ public static class SingleState implements AggregatorState {
+ private final GroupingState internalState;
+
+ private SingleState(BigArrays bigArrays, int limit, boolean ascending) {
+ this.internalState = new GroupingState(bigArrays, limit, ascending);
+ }
+
+ public void add(int value, long outputValue) {
+ internalState.add(0, value, outputValue);
+ }
+
+ @Override
+ public void toIntermediate(Block[] blocks, int offset, DriverContext driverContext) {
+ try (var intValues = driverContext.blockFactory().newConstantIntVector(0, 1)) {
+ internalState.toIntermediate(blocks, offset, intValues, driverContext);
+ }
+ }
+
+ Block toBlock(BlockFactory blockFactory) {
+ try (var intValues = blockFactory.newConstantIntVector(0, 1)) {
+ return internalState.toBlock(blockFactory, intValues);
+ }
+ }
+
+ @Override
+ public void close() {
+ Releasables.closeExpectNoException(internalState);
+ }
+ }
+}
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopIpAggregator.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopIpAggregator.java
index 831f573cb3cd0..118aa86b43dbe 100644
--- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopIpAggregator.java
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopIpAggregator.java
@@ -18,7 +18,6 @@
import org.elasticsearch.compute.data.BlockFactory;
import org.elasticsearch.compute.data.BytesRefBlock;
import org.elasticsearch.compute.data.IntVector;
-import org.elasticsearch.compute.data.BytesRefBlock;
import org.elasticsearch.compute.data.sort.IpBucketedSort;
import org.elasticsearch.compute.operator.DriverContext;
import org.elasticsearch.core.Releasables;
@@ -120,7 +119,9 @@ public void add(BytesRef value) {
@Override
public void toIntermediate(Block[] blocks, int offset, DriverContext driverContext) {
- blocks[offset] = toBlock(driverContext.blockFactory());
+ try (var intValues = driverContext.blockFactory().newConstantIntVector(0, 1)) {
+ internalState.toIntermediate(blocks, offset, intValues, driverContext);
+ }
}
Block toBlock(BlockFactory blockFactory) {
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopLongAggregator.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopLongAggregator.java
index a31ee1afd8a07..a066ac3d779e0 100644
--- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopLongAggregator.java
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopLongAggregator.java
@@ -18,7 +18,6 @@
import org.elasticsearch.compute.data.BlockFactory;
import org.elasticsearch.compute.data.LongBlock;
import org.elasticsearch.compute.data.IntVector;
-import org.elasticsearch.compute.data.LongBlock;
import org.elasticsearch.compute.data.sort.LongBucketedSort;
import org.elasticsearch.compute.operator.DriverContext;
import org.elasticsearch.core.Releasables;
@@ -118,7 +117,9 @@ public void add(long value) {
@Override
public void toIntermediate(Block[] blocks, int offset, DriverContext driverContext) {
- blocks[offset] = toBlock(driverContext.blockFactory());
+ try (var intValues = driverContext.blockFactory().newConstantIntVector(0, 1)) {
+ internalState.toIntermediate(blocks, offset, intValues, driverContext);
+ }
}
Block toBlock(BlockFactory blockFactory) {
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopLongDoubleAggregator.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopLongDoubleAggregator.java
new file mode 100644
index 0000000000000..31c4f9096ca0e
--- /dev/null
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopLongDoubleAggregator.java
@@ -0,0 +1,140 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.aggregation;
+
+// begin generated imports
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.breaker.CircuitBreaker;
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.compute.ann.Aggregator;
+import org.elasticsearch.compute.ann.GroupingAggregator;
+import org.elasticsearch.compute.ann.IntermediateState;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.LongBlock;
+import org.elasticsearch.compute.data.DoubleBlock;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.compute.data.sort.LongDoubleBucketedSort;
+import org.elasticsearch.compute.operator.DriverContext;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.search.sort.SortOrder;
+// end generated imports
+
+/**
+ * Aggregates the top N field values for long.
+ *
+ * This class is generated. Edit `X-TopAggregator.java.st` to edit this file.
+ *
+ */
+@Aggregator({ @IntermediateState(name = "top", type = "LONG_BLOCK"), @IntermediateState(name = "output", type = "DOUBLE_BLOCK") })
+@GroupingAggregator
+class TopLongDoubleAggregator {
+ public static SingleState initSingle(BigArrays bigArrays, int limit, boolean ascending) {
+ return new SingleState(bigArrays, limit, ascending);
+ }
+
+ public static void combine(SingleState state, long v, double outputValue) {
+ state.add(v, outputValue);
+ }
+
+ public static void combineIntermediate(SingleState state, LongBlock values, DoubleBlock outputValues) {
+ int start = values.getFirstValueIndex(0);
+ int end = start + values.getValueCount(0);
+ for (int i = start; i < end; i++) {
+ combine(state, values.getLong(i), outputValues.getDouble(i));
+ }
+ }
+
+ public static Block evaluateFinal(SingleState state, DriverContext driverContext) {
+ return state.toBlock(driverContext.blockFactory());
+ }
+
+ public static GroupingState initGrouping(BigArrays bigArrays, int limit, boolean ascending) {
+ return new GroupingState(bigArrays, limit, ascending);
+ }
+
+ public static void combine(GroupingState state, int groupId, long v, double outputValue) {
+ state.add(groupId, v, outputValue);
+ }
+
+ public static void combineIntermediate(GroupingState state, int groupId, LongBlock values, DoubleBlock outputValues, int valuesPosition) {
+ int start = values.getFirstValueIndex(valuesPosition);
+ int end = start + values.getValueCount(valuesPosition);
+ for (int i = start; i < end; i++) {
+ combine(state, groupId, values.getLong(i), outputValues.getDouble(i));
+ }
+ }
+
+ public static Block evaluateFinal(GroupingState state, IntVector selected, GroupingAggregatorEvaluationContext ctx) {
+ return state.toBlock(ctx.blockFactory(), selected);
+ }
+
+ public static class GroupingState implements GroupingAggregatorState {
+ private final LongDoubleBucketedSort sort;
+
+ private GroupingState(BigArrays bigArrays, int limit, boolean ascending) {
+ this.sort = new LongDoubleBucketedSort(bigArrays, ascending ? SortOrder.ASC : SortOrder.DESC, limit);
+ }
+
+ public void add(int groupId, long value, double outputValue) {
+ sort.collect(value, outputValue, groupId);
+ }
+
+ @Override
+ public void toIntermediate(Block[] blocks, int offset, IntVector selected, DriverContext driverContext) {
+ sort.toBlocks(driverContext.blockFactory(), blocks, offset, selected);
+ }
+
+ Block toBlock(BlockFactory blockFactory, IntVector selected) {
+ Block[] blocks = new Block[2];
+ sort.toBlocks(blockFactory, blocks, 0, selected);
+ Releasables.close(blocks[0]);
+ return blocks[1];
+ }
+
+ @Override
+ public void enableGroupIdTracking(SeenGroupIds seen) {
+ // we figure out seen values from nulls on the values block
+ }
+
+ @Override
+ public void close() {
+ Releasables.closeExpectNoException(sort);
+ }
+ }
+
+ public static class SingleState implements AggregatorState {
+ private final GroupingState internalState;
+
+ private SingleState(BigArrays bigArrays, int limit, boolean ascending) {
+ this.internalState = new GroupingState(bigArrays, limit, ascending);
+ }
+
+ public void add(long value, double outputValue) {
+ internalState.add(0, value, outputValue);
+ }
+
+ @Override
+ public void toIntermediate(Block[] blocks, int offset, DriverContext driverContext) {
+ try (var intValues = driverContext.blockFactory().newConstantIntVector(0, 1)) {
+ internalState.toIntermediate(blocks, offset, intValues, driverContext);
+ }
+ }
+
+ Block toBlock(BlockFactory blockFactory) {
+ try (var intValues = blockFactory.newConstantIntVector(0, 1)) {
+ return internalState.toBlock(blockFactory, intValues);
+ }
+ }
+
+ @Override
+ public void close() {
+ Releasables.closeExpectNoException(internalState);
+ }
+ }
+}
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopLongFloatAggregator.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopLongFloatAggregator.java
new file mode 100644
index 0000000000000..66d699cfd3d32
--- /dev/null
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopLongFloatAggregator.java
@@ -0,0 +1,140 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.aggregation;
+
+// begin generated imports
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.breaker.CircuitBreaker;
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.compute.ann.Aggregator;
+import org.elasticsearch.compute.ann.GroupingAggregator;
+import org.elasticsearch.compute.ann.IntermediateState;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.LongBlock;
+import org.elasticsearch.compute.data.FloatBlock;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.compute.data.sort.LongFloatBucketedSort;
+import org.elasticsearch.compute.operator.DriverContext;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.search.sort.SortOrder;
+// end generated imports
+
+/**
+ * Aggregates the top N field values for long.
+ *
+ * This class is generated. Edit `X-TopAggregator.java.st` to edit this file.
+ *
+ */
+@Aggregator({ @IntermediateState(name = "top", type = "LONG_BLOCK"), @IntermediateState(name = "output", type = "FLOAT_BLOCK") })
+@GroupingAggregator
+class TopLongFloatAggregator {
+ public static SingleState initSingle(BigArrays bigArrays, int limit, boolean ascending) {
+ return new SingleState(bigArrays, limit, ascending);
+ }
+
+ public static void combine(SingleState state, long v, float outputValue) {
+ state.add(v, outputValue);
+ }
+
+ public static void combineIntermediate(SingleState state, LongBlock values, FloatBlock outputValues) {
+ int start = values.getFirstValueIndex(0);
+ int end = start + values.getValueCount(0);
+ for (int i = start; i < end; i++) {
+ combine(state, values.getLong(i), outputValues.getFloat(i));
+ }
+ }
+
+ public static Block evaluateFinal(SingleState state, DriverContext driverContext) {
+ return state.toBlock(driverContext.blockFactory());
+ }
+
+ public static GroupingState initGrouping(BigArrays bigArrays, int limit, boolean ascending) {
+ return new GroupingState(bigArrays, limit, ascending);
+ }
+
+ public static void combine(GroupingState state, int groupId, long v, float outputValue) {
+ state.add(groupId, v, outputValue);
+ }
+
+ public static void combineIntermediate(GroupingState state, int groupId, LongBlock values, FloatBlock outputValues, int valuesPosition) {
+ int start = values.getFirstValueIndex(valuesPosition);
+ int end = start + values.getValueCount(valuesPosition);
+ for (int i = start; i < end; i++) {
+ combine(state, groupId, values.getLong(i), outputValues.getFloat(i));
+ }
+ }
+
+ public static Block evaluateFinal(GroupingState state, IntVector selected, GroupingAggregatorEvaluationContext ctx) {
+ return state.toBlock(ctx.blockFactory(), selected);
+ }
+
+ public static class GroupingState implements GroupingAggregatorState {
+ private final LongFloatBucketedSort sort;
+
+ private GroupingState(BigArrays bigArrays, int limit, boolean ascending) {
+ this.sort = new LongFloatBucketedSort(bigArrays, ascending ? SortOrder.ASC : SortOrder.DESC, limit);
+ }
+
+ public void add(int groupId, long value, float outputValue) {
+ sort.collect(value, outputValue, groupId);
+ }
+
+ @Override
+ public void toIntermediate(Block[] blocks, int offset, IntVector selected, DriverContext driverContext) {
+ sort.toBlocks(driverContext.blockFactory(), blocks, offset, selected);
+ }
+
+ Block toBlock(BlockFactory blockFactory, IntVector selected) {
+ Block[] blocks = new Block[2];
+ sort.toBlocks(blockFactory, blocks, 0, selected);
+ Releasables.close(blocks[0]);
+ return blocks[1];
+ }
+
+ @Override
+ public void enableGroupIdTracking(SeenGroupIds seen) {
+ // we figure out seen values from nulls on the values block
+ }
+
+ @Override
+ public void close() {
+ Releasables.closeExpectNoException(sort);
+ }
+ }
+
+ public static class SingleState implements AggregatorState {
+ private final GroupingState internalState;
+
+ private SingleState(BigArrays bigArrays, int limit, boolean ascending) {
+ this.internalState = new GroupingState(bigArrays, limit, ascending);
+ }
+
+ public void add(long value, float outputValue) {
+ internalState.add(0, value, outputValue);
+ }
+
+ @Override
+ public void toIntermediate(Block[] blocks, int offset, DriverContext driverContext) {
+ try (var intValues = driverContext.blockFactory().newConstantIntVector(0, 1)) {
+ internalState.toIntermediate(blocks, offset, intValues, driverContext);
+ }
+ }
+
+ Block toBlock(BlockFactory blockFactory) {
+ try (var intValues = blockFactory.newConstantIntVector(0, 1)) {
+ return internalState.toBlock(blockFactory, intValues);
+ }
+ }
+
+ @Override
+ public void close() {
+ Releasables.closeExpectNoException(internalState);
+ }
+ }
+}
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopLongIntAggregator.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopLongIntAggregator.java
new file mode 100644
index 0000000000000..d2f661348a146
--- /dev/null
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopLongIntAggregator.java
@@ -0,0 +1,140 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.aggregation;
+
+// begin generated imports
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.breaker.CircuitBreaker;
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.compute.ann.Aggregator;
+import org.elasticsearch.compute.ann.GroupingAggregator;
+import org.elasticsearch.compute.ann.IntermediateState;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.LongBlock;
+import org.elasticsearch.compute.data.IntBlock;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.compute.data.sort.LongIntBucketedSort;
+import org.elasticsearch.compute.operator.DriverContext;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.search.sort.SortOrder;
+// end generated imports
+
+/**
+ * Aggregates the top N field values for long.
+ *
+ * This class is generated. Edit `X-TopAggregator.java.st` to edit this file.
+ *
+ */
+@Aggregator({ @IntermediateState(name = "top", type = "LONG_BLOCK"), @IntermediateState(name = "output", type = "INT_BLOCK") })
+@GroupingAggregator
+class TopLongIntAggregator {
+ public static SingleState initSingle(BigArrays bigArrays, int limit, boolean ascending) {
+ return new SingleState(bigArrays, limit, ascending);
+ }
+
+ public static void combine(SingleState state, long v, int outputValue) {
+ state.add(v, outputValue);
+ }
+
+ public static void combineIntermediate(SingleState state, LongBlock values, IntBlock outputValues) {
+ int start = values.getFirstValueIndex(0);
+ int end = start + values.getValueCount(0);
+ for (int i = start; i < end; i++) {
+ combine(state, values.getLong(i), outputValues.getInt(i));
+ }
+ }
+
+ public static Block evaluateFinal(SingleState state, DriverContext driverContext) {
+ return state.toBlock(driverContext.blockFactory());
+ }
+
+ public static GroupingState initGrouping(BigArrays bigArrays, int limit, boolean ascending) {
+ return new GroupingState(bigArrays, limit, ascending);
+ }
+
+ public static void combine(GroupingState state, int groupId, long v, int outputValue) {
+ state.add(groupId, v, outputValue);
+ }
+
+ public static void combineIntermediate(GroupingState state, int groupId, LongBlock values, IntBlock outputValues, int valuesPosition) {
+ int start = values.getFirstValueIndex(valuesPosition);
+ int end = start + values.getValueCount(valuesPosition);
+ for (int i = start; i < end; i++) {
+ combine(state, groupId, values.getLong(i), outputValues.getInt(i));
+ }
+ }
+
+ public static Block evaluateFinal(GroupingState state, IntVector selected, GroupingAggregatorEvaluationContext ctx) {
+ return state.toBlock(ctx.blockFactory(), selected);
+ }
+
+ public static class GroupingState implements GroupingAggregatorState {
+ private final LongIntBucketedSort sort;
+
+ private GroupingState(BigArrays bigArrays, int limit, boolean ascending) {
+ this.sort = new LongIntBucketedSort(bigArrays, ascending ? SortOrder.ASC : SortOrder.DESC, limit);
+ }
+
+ public void add(int groupId, long value, int outputValue) {
+ sort.collect(value, outputValue, groupId);
+ }
+
+ @Override
+ public void toIntermediate(Block[] blocks, int offset, IntVector selected, DriverContext driverContext) {
+ sort.toBlocks(driverContext.blockFactory(), blocks, offset, selected);
+ }
+
+ Block toBlock(BlockFactory blockFactory, IntVector selected) {
+ Block[] blocks = new Block[2];
+ sort.toBlocks(blockFactory, blocks, 0, selected);
+ Releasables.close(blocks[0]);
+ return blocks[1];
+ }
+
+ @Override
+ public void enableGroupIdTracking(SeenGroupIds seen) {
+ // we figure out seen values from nulls on the values block
+ }
+
+ @Override
+ public void close() {
+ Releasables.closeExpectNoException(sort);
+ }
+ }
+
+ public static class SingleState implements AggregatorState {
+ private final GroupingState internalState;
+
+ private SingleState(BigArrays bigArrays, int limit, boolean ascending) {
+ this.internalState = new GroupingState(bigArrays, limit, ascending);
+ }
+
+ public void add(long value, int outputValue) {
+ internalState.add(0, value, outputValue);
+ }
+
+ @Override
+ public void toIntermediate(Block[] blocks, int offset, DriverContext driverContext) {
+ try (var intValues = driverContext.blockFactory().newConstantIntVector(0, 1)) {
+ internalState.toIntermediate(blocks, offset, intValues, driverContext);
+ }
+ }
+
+ Block toBlock(BlockFactory blockFactory) {
+ try (var intValues = blockFactory.newConstantIntVector(0, 1)) {
+ return internalState.toBlock(blockFactory, intValues);
+ }
+ }
+
+ @Override
+ public void close() {
+ Releasables.closeExpectNoException(internalState);
+ }
+ }
+}
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopLongLongAggregator.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopLongLongAggregator.java
new file mode 100644
index 0000000000000..3db32f71905c0
--- /dev/null
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopLongLongAggregator.java
@@ -0,0 +1,140 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.aggregation;
+
+// begin generated imports
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.breaker.CircuitBreaker;
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.compute.ann.Aggregator;
+import org.elasticsearch.compute.ann.GroupingAggregator;
+import org.elasticsearch.compute.ann.IntermediateState;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.LongBlock;
+import org.elasticsearch.compute.data.LongBlock;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.compute.data.sort.LongLongBucketedSort;
+import org.elasticsearch.compute.operator.DriverContext;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.search.sort.SortOrder;
+// end generated imports
+
+/**
+ * Aggregates the top N field values for long.
+ *
+ * This class is generated. Edit `X-TopAggregator.java.st` to edit this file.
+ *
+ */
+@Aggregator({ @IntermediateState(name = "top", type = "LONG_BLOCK"), @IntermediateState(name = "output", type = "LONG_BLOCK") })
+@GroupingAggregator
+class TopLongLongAggregator {
+ public static SingleState initSingle(BigArrays bigArrays, int limit, boolean ascending) {
+ return new SingleState(bigArrays, limit, ascending);
+ }
+
+ public static void combine(SingleState state, long v, long outputValue) {
+ state.add(v, outputValue);
+ }
+
+ public static void combineIntermediate(SingleState state, LongBlock values, LongBlock outputValues) {
+ int start = values.getFirstValueIndex(0);
+ int end = start + values.getValueCount(0);
+ for (int i = start; i < end; i++) {
+ combine(state, values.getLong(i), outputValues.getLong(i));
+ }
+ }
+
+ public static Block evaluateFinal(SingleState state, DriverContext driverContext) {
+ return state.toBlock(driverContext.blockFactory());
+ }
+
+ public static GroupingState initGrouping(BigArrays bigArrays, int limit, boolean ascending) {
+ return new GroupingState(bigArrays, limit, ascending);
+ }
+
+ public static void combine(GroupingState state, int groupId, long v, long outputValue) {
+ state.add(groupId, v, outputValue);
+ }
+
+ public static void combineIntermediate(GroupingState state, int groupId, LongBlock values, LongBlock outputValues, int valuesPosition) {
+ int start = values.getFirstValueIndex(valuesPosition);
+ int end = start + values.getValueCount(valuesPosition);
+ for (int i = start; i < end; i++) {
+ combine(state, groupId, values.getLong(i), outputValues.getLong(i));
+ }
+ }
+
+ public static Block evaluateFinal(GroupingState state, IntVector selected, GroupingAggregatorEvaluationContext ctx) {
+ return state.toBlock(ctx.blockFactory(), selected);
+ }
+
+ public static class GroupingState implements GroupingAggregatorState {
+ private final LongLongBucketedSort sort;
+
+ private GroupingState(BigArrays bigArrays, int limit, boolean ascending) {
+ this.sort = new LongLongBucketedSort(bigArrays, ascending ? SortOrder.ASC : SortOrder.DESC, limit);
+ }
+
+ public void add(int groupId, long value, long outputValue) {
+ sort.collect(value, outputValue, groupId);
+ }
+
+ @Override
+ public void toIntermediate(Block[] blocks, int offset, IntVector selected, DriverContext driverContext) {
+ sort.toBlocks(driverContext.blockFactory(), blocks, offset, selected);
+ }
+
+ Block toBlock(BlockFactory blockFactory, IntVector selected) {
+ Block[] blocks = new Block[2];
+ sort.toBlocks(blockFactory, blocks, 0, selected);
+ Releasables.close(blocks[0]);
+ return blocks[1];
+ }
+
+ @Override
+ public void enableGroupIdTracking(SeenGroupIds seen) {
+ // we figure out seen values from nulls on the values block
+ }
+
+ @Override
+ public void close() {
+ Releasables.closeExpectNoException(sort);
+ }
+ }
+
+ public static class SingleState implements AggregatorState {
+ private final GroupingState internalState;
+
+ private SingleState(BigArrays bigArrays, int limit, boolean ascending) {
+ this.internalState = new GroupingState(bigArrays, limit, ascending);
+ }
+
+ public void add(long value, long outputValue) {
+ internalState.add(0, value, outputValue);
+ }
+
+ @Override
+ public void toIntermediate(Block[] blocks, int offset, DriverContext driverContext) {
+ try (var intValues = driverContext.blockFactory().newConstantIntVector(0, 1)) {
+ internalState.toIntermediate(blocks, offset, intValues, driverContext);
+ }
+ }
+
+ Block toBlock(BlockFactory blockFactory) {
+ try (var intValues = blockFactory.newConstantIntVector(0, 1)) {
+ return internalState.toBlock(blockFactory, intValues);
+ }
+ }
+
+ @Override
+ public void close() {
+ Releasables.closeExpectNoException(internalState);
+ }
+ }
+}
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/DoubleBucketedSort.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/DoubleBucketedSort.java
index c8c6701e68e4a..995cf68cd6a50 100644
--- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/DoubleBucketedSort.java
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/DoubleBucketedSort.java
@@ -23,7 +23,7 @@
import java.util.stream.IntStream;
/**
- * Aggregates the top N double values per bucket.
+ * Aggregates the top N {@code double} values per bucket.
* See {@link BucketedSort} for more information.
* This class is generated. Edit @{code X-BucketedSort.java.st} instead of this file.
*/
@@ -162,12 +162,7 @@ public void merge(int groupId, DoubleBucketedSort other, int otherGroupId) {
*/
public Block toBlock(BlockFactory blockFactory, IntVector selected) {
// Check if the selected groups are all empty, to avoid allocating extra memory
- if (IntStream.range(0, selected.getPositionCount()).map(selected::getInt).noneMatch(bucket -> {
- var bounds = this.getBucketValuesIndexes(bucket);
- var size = bounds.v2() - bounds.v1();
-
- return size > 0;
- })) {
+ if (allSelectedGroupsAreEmpty(selected)) {
return blockFactory.newConstantNullBlock(selected.getPositionCount());
}
@@ -185,7 +180,7 @@ public Block toBlock(BlockFactory blockFactory, IntVector selected) {
}
if (size == 1) {
- builder.appendDouble(values.get(bounds.v1()));
+ builder.appendDouble(values.get(rootIndex));
continue;
}
@@ -197,7 +192,7 @@ public Block toBlock(BlockFactory blockFactory, IntVector selected) {
builder.beginPositionEntry();
for (int i = 0; i < size; i++) {
- builder.appendDouble(values.get(bounds.v1() + i));
+ builder.appendDouble(values.get(rootIndex + i));
}
builder.endPositionEntry();
}
@@ -205,6 +200,17 @@ public Block toBlock(BlockFactory blockFactory, IntVector selected) {
}
}
+ /**
+ * Checks if the selected groups are all empty.
+ */
+ private boolean allSelectedGroupsAreEmpty(IntVector selected) {
+ return IntStream.range(0, selected.getPositionCount()).map(selected::getInt).noneMatch(bucket -> {
+ var bounds = this.getBucketValuesIndexes(bucket);
+ var size = bounds.v2() - bounds.v1();
+ return size > 0;
+ });
+ }
+
/**
* Is this bucket a min heap {@code true} or in gathering mode {@code false}?
*/
@@ -234,7 +240,8 @@ private void setNextGatherOffset(long rootIndex, int offset) {
* {@link SortOrder#ASC} and "higher" for {@link SortOrder#DESC}.
*/
private boolean betterThan(double lhs, double rhs) {
- return getOrder().reverseMul() * Double.compare(lhs, rhs) < 0;
+ int res = Double.compare(lhs, rhs);
+ return getOrder().reverseMul() * res < 0;
}
/**
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/DoubleDoubleBucketedSort.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/DoubleDoubleBucketedSort.java
new file mode 100644
index 0000000000000..6438cb1c99a62
--- /dev/null
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/DoubleDoubleBucketedSort.java
@@ -0,0 +1,406 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.data.sort;
+
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.common.util.BitArray;
+import org.elasticsearch.common.util.DoubleArray;
+import org.elasticsearch.common.util.DoubleArray;
+import org.elasticsearch.common.util.PageCacheRecycler;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.core.Releasable;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.core.Tuple;
+import org.elasticsearch.search.sort.BucketedSort;
+import org.elasticsearch.search.sort.SortOrder;
+
+import java.util.stream.IntStream;
+
+/**
+ * Aggregates the top N {@code double} values per bucket.
+ * See {@link BucketedSort} for more information.
+ * This class is generated. Edit @{code X-BucketedSort.java.st} instead of this file.
+ */
+public class DoubleDoubleBucketedSort implements Releasable {
+
+ private final BigArrays bigArrays;
+ private final SortOrder order;
+ private final int bucketSize;
+ /**
+ * {@code true} if the bucket is in heap mode, {@code false} if
+ * it is still gathering.
+ */
+ private final BitArray heapMode;
+ /**
+ * An array containing all the values on all buckets. The structure is as follows:
+ *
+ * For each bucket, there are bucketSize elements, based on the bucket id (0, 1, 2...).
+ * Then, for each bucket, it can be in 2 states:
+ *
+ *
+ *
+ * Gather mode: All buckets start in gather mode, and remain here while they have less than bucketSize elements.
+ * In gather mode, the elements are stored in the array from the highest index to the lowest index.
+ * The lowest index contains the offset to the next slot to be filled.
+ *
+ * This allows us to insert elements in O(1) time.
+ *
+ *
+ * When the bucketSize-th element is collected, the bucket transitions to heap mode, by heapifying its contents.
+ *
+ *
+ *
+ * Heap mode: The bucket slots are organized as a min heap structure.
+ *
+ * The root of the heap is the minimum value in the bucket,
+ * which allows us to quickly discard new values that are not in the top N.
+ *
+ * It may or may not be inserted in the heap, depending on if it is better than the current root.
+ *
+ */
+ public void collect(double value, double extraValue, int bucket) {
+ long rootIndex = (long) bucket * bucketSize;
+ if (inHeapMode(bucket)) {
+ if (betterThan(value, values.get(rootIndex), extraValue, extraValues.get(rootIndex))) {
+ values.set(rootIndex, value);
+ extraValues.set(rootIndex, extraValue);
+ downHeap(rootIndex, 0, bucketSize);
+ }
+ return;
+ }
+ // Gathering mode
+ long requiredSize = rootIndex + bucketSize;
+ if (values.size() < requiredSize) {
+ grow(bucket);
+ }
+ int next = getNextGatherOffset(rootIndex);
+ assert 0 <= next && next < bucketSize
+ : "Expected next to be in the range of valid buckets [0 <= " + next + " < " + bucketSize + "]";
+ long index = next + rootIndex;
+ values.set(index, value);
+ extraValues.set(index, extraValue);
+ if (next == 0) {
+ heapMode.set(bucket);
+ heapify(rootIndex, bucketSize);
+ } else {
+ setNextGatherOffset(rootIndex, next - 1);
+ }
+ }
+
+ /**
+ * The order of the sort.
+ */
+ public SortOrder getOrder() {
+ return order;
+ }
+
+ /**
+ * The number of values to store per bucket.
+ */
+ public int getBucketSize() {
+ return bucketSize;
+ }
+
+ /**
+ * Get the first and last indexes (inclusive, exclusive) of the values for a bucket.
+ * Returns [0, 0] if the bucket has never been collected.
+ */
+ private Tuple getBucketValuesIndexes(int bucket) {
+ long rootIndex = (long) bucket * bucketSize;
+ if (rootIndex >= values.size()) {
+ // We've never seen this bucket.
+ return Tuple.tuple(0L, 0L);
+ }
+ long start = inHeapMode(bucket) ? rootIndex : (rootIndex + getNextGatherOffset(rootIndex) + 1);
+ long end = rootIndex + bucketSize;
+ return Tuple.tuple(start, end);
+ }
+
+ /**
+ * Merge the values from {@code other}'s {@code otherGroupId} into {@code groupId}.
+ */
+ public void merge(int groupId, DoubleDoubleBucketedSort other, int otherGroupId) {
+ var otherBounds = other.getBucketValuesIndexes(otherGroupId);
+
+ // TODO: This can be improved for heapified buckets by making use of the heap structures
+ for (long i = otherBounds.v1(); i < otherBounds.v2(); i++) {
+ collect(other.values.get(i), other.extraValues.get(i), groupId);
+ }
+ }
+
+ /**
+ * Creates a block with the values from the {@code selected} groups.
+ */
+ public void toBlocks(BlockFactory blockFactory, Block[] blocks, int offset, IntVector selected) {
+ // Check if the selected groups are all empty, to avoid allocating extra memory
+ if (allSelectedGroupsAreEmpty(selected)) {
+ Block constantNullBlock = blockFactory.newConstantNullBlock(selected.getPositionCount());
+ constantNullBlock.incRef();
+ blocks[offset] = constantNullBlock;
+ blocks[offset + 1] = constantNullBlock;
+ return;
+ }
+
+ try (
+ var builder = blockFactory.newDoubleBlockBuilder(selected.getPositionCount());
+ var extraBuilder = blockFactory.newDoubleBlockBuilder(selected.getPositionCount())
+ ) {
+ for (int s = 0; s < selected.getPositionCount(); s++) {
+ int bucket = selected.getInt(s);
+
+ var bounds = getBucketValuesIndexes(bucket);
+ var rootIndex = bounds.v1();
+ var size = bounds.v2() - bounds.v1();
+
+ if (size == 0) {
+ builder.appendNull();
+ extraBuilder.appendNull();
+ continue;
+ }
+
+ if (size == 1) {
+ builder.appendDouble(values.get(rootIndex));
+ extraBuilder.appendDouble(extraValues.get(rootIndex));
+ continue;
+ }
+
+ // If we are in the gathering mode, we need to heapify before sorting.
+ if (inHeapMode(bucket) == false) {
+ heapify(rootIndex, (int) size);
+ }
+ heapSort(rootIndex, (int) size);
+
+ builder.beginPositionEntry();
+ extraBuilder.beginPositionEntry();
+ for (int i = 0; i < size; i++) {
+ builder.appendDouble(values.get(rootIndex + i));
+ extraBuilder.appendDouble(extraValues.get(rootIndex + i));
+ }
+ builder.endPositionEntry();
+ extraBuilder.endPositionEntry();
+ }
+ blocks[offset] = builder.build();
+ blocks[offset + 1] = extraBuilder.build();
+ }
+ }
+
+ /**
+ * Checks if the selected groups are all empty.
+ */
+ private boolean allSelectedGroupsAreEmpty(IntVector selected) {
+ return IntStream.range(0, selected.getPositionCount()).map(selected::getInt).noneMatch(bucket -> {
+ var bounds = this.getBucketValuesIndexes(bucket);
+ var size = bounds.v2() - bounds.v1();
+ return size > 0;
+ });
+ }
+
+ /**
+ * Is this bucket a min heap {@code true} or in gathering mode {@code false}?
+ */
+ private boolean inHeapMode(int bucket) {
+ return heapMode.get(bucket);
+ }
+
+ /**
+ * Get the next index that should be "gathered" for a bucket rooted
+ * at {@code rootIndex}.
+ */
+ private int getNextGatherOffset(long rootIndex) {
+ return (int) values.get(rootIndex);
+ }
+
+ /**
+ * Set the next index that should be "gathered" for a bucket rooted
+ * at {@code rootIndex}.
+ */
+ private void setNextGatherOffset(long rootIndex, int offset) {
+ values.set(rootIndex, offset);
+ }
+
+ /**
+ * {@code true} if the entry at index {@code lhs} is "better" than
+ * the entry at {@code rhs}. "Better" in this means "lower" for
+ * {@link SortOrder#ASC} and "higher" for {@link SortOrder#DESC}.
+ */
+ private boolean betterThan(double lhs, double rhs, double lhsExtra, double rhsExtra) {
+ int res = Double.compare(lhs, rhs);
+ if (res != 0) {
+ return getOrder().reverseMul() * res < 0;
+ }
+ res = Double.compare(lhsExtra, rhsExtra);
+ return getOrder().reverseMul() * res < 0;
+ }
+
+ /**
+ * Swap the data at two indices.
+ */
+ private void swap(long lhs, long rhs) {
+ var tmp = values.get(lhs);
+ values.set(lhs, values.get(rhs));
+ values.set(rhs, tmp);
+ var tmpExtra = extraValues.get(lhs);
+ extraValues.set(lhs, extraValues.get(rhs));
+ extraValues.set(rhs, tmpExtra);
+ }
+
+ /**
+ * Allocate storage for more buckets and store the "next gather offset"
+ * for those new buckets. We always grow the storage by whole bucket's
+ * worth of slots at a time. We never allocate space for partial buckets.
+ */
+ private void grow(int bucket) {
+ long oldMax = values.size();
+ assert oldMax % bucketSize == 0;
+
+ long newSize = BigArrays.overSize(((long) bucket + 1) * bucketSize, PageCacheRecycler.DOUBLE_PAGE_SIZE, Double.BYTES);
+ // Round up to the next full bucket.
+ newSize = (newSize + bucketSize - 1) / bucketSize;
+ values = bigArrays.resize(values, newSize * bucketSize);
+ // Round up to the next full bucket.
+ extraValues = bigArrays.resize(extraValues, newSize * bucketSize);
+ // Set the next gather offsets for all newly allocated buckets.
+ fillGatherOffsets(oldMax);
+ }
+
+ /**
+ * Maintain the "next gather offsets" for newly allocated buckets.
+ */
+ private void fillGatherOffsets(long startingAt) {
+ int nextOffset = getBucketSize() - 1;
+ for (long bucketRoot = startingAt; bucketRoot < values.size(); bucketRoot += getBucketSize()) {
+ setNextGatherOffset(bucketRoot, nextOffset);
+ }
+ }
+
+ /**
+ * Heapify a bucket whose entries are in random order.
+ *
+ * This works by validating the heap property on each node, iterating
+ * "upwards", pushing any out of order parents "down". Check out the
+ * wikipedia
+ * entry on binary heaps for more about this.
+ *
+ *
+ * While this *looks* like it could easily be {@code O(n * log n)}, it is
+ * a fairly well studied algorithm attributed to Floyd. There's
+ * been a bunch of work that puts this at {@code O(n)}, close to 1.88n worst
+ * case.
+ *
D.E. Knuth, ”The Art of Computer Programming, Vol. 3, Sorting and Searching”
+ *
+ * @param rootIndex the index the start of the bucket
+ */
+ private void heapify(long rootIndex, int heapSize) {
+ int maxParent = heapSize / 2 - 1;
+ for (int parent = maxParent; parent >= 0; parent--) {
+ downHeap(rootIndex, parent, heapSize);
+ }
+ }
+
+ /**
+ * Sorts all the values in the heap using heap sort algorithm.
+ * This runs in {@code O(n log n)} time.
+ * @param rootIndex index of the start of the bucket
+ * @param heapSize Number of values that belong to the heap.
+ * Can be less than bucketSize.
+ * In such a case, the remaining values in range
+ * (rootIndex + heapSize, rootIndex + bucketSize)
+ * are *not* considered part of the heap.
+ */
+ private void heapSort(long rootIndex, int heapSize) {
+ while (heapSize > 0) {
+ swap(rootIndex, rootIndex + heapSize - 1);
+ heapSize--;
+ downHeap(rootIndex, 0, heapSize);
+ }
+ }
+
+ /**
+ * Correct the heap invariant of a parent and its children. This
+ * runs in {@code O(log n)} time.
+ * @param rootIndex index of the start of the bucket
+ * @param parent Index within the bucket of the parent to check.
+ * For example, 0 is the "root".
+ * @param heapSize Number of values that belong to the heap.
+ * Can be less than bucketSize.
+ * In such a case, the remaining values in range
+ * (rootIndex + heapSize, rootIndex + bucketSize)
+ * are *not* considered part of the heap.
+ */
+ private void downHeap(long rootIndex, int parent, int heapSize) {
+ while (true) {
+ long parentIndex = rootIndex + parent;
+ int worst = parent;
+ long worstIndex = parentIndex;
+ int leftChild = parent * 2 + 1;
+ long leftIndex = rootIndex + leftChild;
+ if (leftChild < heapSize) {
+ if (betterThan(values.get(worstIndex), values.get(leftIndex), extraValues.get(worstIndex), extraValues.get(leftIndex))) {
+ worst = leftChild;
+ worstIndex = leftIndex;
+ }
+ int rightChild = leftChild + 1;
+ long rightIndex = rootIndex + rightChild;
+ if (rightChild < heapSize
+ && betterThan(
+ values.get(worstIndex),
+ values.get(rightIndex),
+ extraValues.get(worstIndex),
+ extraValues.get(rightIndex)
+ )) {
+ worst = rightChild;
+ worstIndex = rightIndex;
+ }
+ }
+ if (worst == parent) {
+ break;
+ }
+ swap(worstIndex, parentIndex);
+ parent = worst;
+ }
+ }
+
+ @Override
+ public final void close() {
+ Releasables.close(values, extraValues, heapMode);
+ }
+}
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/DoubleFloatBucketedSort.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/DoubleFloatBucketedSort.java
new file mode 100644
index 0000000000000..f4bd6d9fdbf92
--- /dev/null
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/DoubleFloatBucketedSort.java
@@ -0,0 +1,406 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.data.sort;
+
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.common.util.BitArray;
+import org.elasticsearch.common.util.FloatArray;
+import org.elasticsearch.common.util.DoubleArray;
+import org.elasticsearch.common.util.PageCacheRecycler;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.core.Releasable;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.core.Tuple;
+import org.elasticsearch.search.sort.BucketedSort;
+import org.elasticsearch.search.sort.SortOrder;
+
+import java.util.stream.IntStream;
+
+/**
+ * Aggregates the top N {@code double} values per bucket.
+ * See {@link BucketedSort} for more information.
+ * This class is generated. Edit @{code X-BucketedSort.java.st} instead of this file.
+ */
+public class DoubleFloatBucketedSort implements Releasable {
+
+ private final BigArrays bigArrays;
+ private final SortOrder order;
+ private final int bucketSize;
+ /**
+ * {@code true} if the bucket is in heap mode, {@code false} if
+ * it is still gathering.
+ */
+ private final BitArray heapMode;
+ /**
+ * An array containing all the values on all buckets. The structure is as follows:
+ *
+ * For each bucket, there are bucketSize elements, based on the bucket id (0, 1, 2...).
+ * Then, for each bucket, it can be in 2 states:
+ *
+ *
+ *
+ * Gather mode: All buckets start in gather mode, and remain here while they have less than bucketSize elements.
+ * In gather mode, the elements are stored in the array from the highest index to the lowest index.
+ * The lowest index contains the offset to the next slot to be filled.
+ *
+ * This allows us to insert elements in O(1) time.
+ *
+ *
+ * When the bucketSize-th element is collected, the bucket transitions to heap mode, by heapifying its contents.
+ *
+ *
+ *
+ * Heap mode: The bucket slots are organized as a min heap structure.
+ *
+ * The root of the heap is the minimum value in the bucket,
+ * which allows us to quickly discard new values that are not in the top N.
+ *
+ * It may or may not be inserted in the heap, depending on if it is better than the current root.
+ *
+ */
+ public void collect(double value, float extraValue, int bucket) {
+ long rootIndex = (long) bucket * bucketSize;
+ if (inHeapMode(bucket)) {
+ if (betterThan(value, values.get(rootIndex), extraValue, extraValues.get(rootIndex))) {
+ values.set(rootIndex, value);
+ extraValues.set(rootIndex, extraValue);
+ downHeap(rootIndex, 0, bucketSize);
+ }
+ return;
+ }
+ // Gathering mode
+ long requiredSize = rootIndex + bucketSize;
+ if (values.size() < requiredSize) {
+ grow(bucket);
+ }
+ int next = getNextGatherOffset(rootIndex);
+ assert 0 <= next && next < bucketSize
+ : "Expected next to be in the range of valid buckets [0 <= " + next + " < " + bucketSize + "]";
+ long index = next + rootIndex;
+ values.set(index, value);
+ extraValues.set(index, extraValue);
+ if (next == 0) {
+ heapMode.set(bucket);
+ heapify(rootIndex, bucketSize);
+ } else {
+ setNextGatherOffset(rootIndex, next - 1);
+ }
+ }
+
+ /**
+ * The order of the sort.
+ */
+ public SortOrder getOrder() {
+ return order;
+ }
+
+ /**
+ * The number of values to store per bucket.
+ */
+ public int getBucketSize() {
+ return bucketSize;
+ }
+
+ /**
+ * Get the first and last indexes (inclusive, exclusive) of the values for a bucket.
+ * Returns [0, 0] if the bucket has never been collected.
+ */
+ private Tuple getBucketValuesIndexes(int bucket) {
+ long rootIndex = (long) bucket * bucketSize;
+ if (rootIndex >= values.size()) {
+ // We've never seen this bucket.
+ return Tuple.tuple(0L, 0L);
+ }
+ long start = inHeapMode(bucket) ? rootIndex : (rootIndex + getNextGatherOffset(rootIndex) + 1);
+ long end = rootIndex + bucketSize;
+ return Tuple.tuple(start, end);
+ }
+
+ /**
+ * Merge the values from {@code other}'s {@code otherGroupId} into {@code groupId}.
+ */
+ public void merge(int groupId, DoubleFloatBucketedSort other, int otherGroupId) {
+ var otherBounds = other.getBucketValuesIndexes(otherGroupId);
+
+ // TODO: This can be improved for heapified buckets by making use of the heap structures
+ for (long i = otherBounds.v1(); i < otherBounds.v2(); i++) {
+ collect(other.values.get(i), other.extraValues.get(i), groupId);
+ }
+ }
+
+ /**
+ * Creates a block with the values from the {@code selected} groups.
+ */
+ public void toBlocks(BlockFactory blockFactory, Block[] blocks, int offset, IntVector selected) {
+ // Check if the selected groups are all empty, to avoid allocating extra memory
+ if (allSelectedGroupsAreEmpty(selected)) {
+ Block constantNullBlock = blockFactory.newConstantNullBlock(selected.getPositionCount());
+ constantNullBlock.incRef();
+ blocks[offset] = constantNullBlock;
+ blocks[offset + 1] = constantNullBlock;
+ return;
+ }
+
+ try (
+ var builder = blockFactory.newDoubleBlockBuilder(selected.getPositionCount());
+ var extraBuilder = blockFactory.newFloatBlockBuilder(selected.getPositionCount())
+ ) {
+ for (int s = 0; s < selected.getPositionCount(); s++) {
+ int bucket = selected.getInt(s);
+
+ var bounds = getBucketValuesIndexes(bucket);
+ var rootIndex = bounds.v1();
+ var size = bounds.v2() - bounds.v1();
+
+ if (size == 0) {
+ builder.appendNull();
+ extraBuilder.appendNull();
+ continue;
+ }
+
+ if (size == 1) {
+ builder.appendDouble(values.get(rootIndex));
+ extraBuilder.appendFloat(extraValues.get(rootIndex));
+ continue;
+ }
+
+ // If we are in the gathering mode, we need to heapify before sorting.
+ if (inHeapMode(bucket) == false) {
+ heapify(rootIndex, (int) size);
+ }
+ heapSort(rootIndex, (int) size);
+
+ builder.beginPositionEntry();
+ extraBuilder.beginPositionEntry();
+ for (int i = 0; i < size; i++) {
+ builder.appendDouble(values.get(rootIndex + i));
+ extraBuilder.appendFloat(extraValues.get(rootIndex + i));
+ }
+ builder.endPositionEntry();
+ extraBuilder.endPositionEntry();
+ }
+ blocks[offset] = builder.build();
+ blocks[offset + 1] = extraBuilder.build();
+ }
+ }
+
+ /**
+ * Checks if the selected groups are all empty.
+ */
+ private boolean allSelectedGroupsAreEmpty(IntVector selected) {
+ return IntStream.range(0, selected.getPositionCount()).map(selected::getInt).noneMatch(bucket -> {
+ var bounds = this.getBucketValuesIndexes(bucket);
+ var size = bounds.v2() - bounds.v1();
+ return size > 0;
+ });
+ }
+
+ /**
+ * Is this bucket a min heap {@code true} or in gathering mode {@code false}?
+ */
+ private boolean inHeapMode(int bucket) {
+ return heapMode.get(bucket);
+ }
+
+ /**
+ * Get the next index that should be "gathered" for a bucket rooted
+ * at {@code rootIndex}.
+ */
+ private int getNextGatherOffset(long rootIndex) {
+ return (int) values.get(rootIndex);
+ }
+
+ /**
+ * Set the next index that should be "gathered" for a bucket rooted
+ * at {@code rootIndex}.
+ */
+ private void setNextGatherOffset(long rootIndex, int offset) {
+ values.set(rootIndex, offset);
+ }
+
+ /**
+ * {@code true} if the entry at index {@code lhs} is "better" than
+ * the entry at {@code rhs}. "Better" in this means "lower" for
+ * {@link SortOrder#ASC} and "higher" for {@link SortOrder#DESC}.
+ */
+ private boolean betterThan(double lhs, double rhs, float lhsExtra, float rhsExtra) {
+ int res = Double.compare(lhs, rhs);
+ if (res != 0) {
+ return getOrder().reverseMul() * res < 0;
+ }
+ res = Float.compare(lhsExtra, rhsExtra);
+ return getOrder().reverseMul() * res < 0;
+ }
+
+ /**
+ * Swap the data at two indices.
+ */
+ private void swap(long lhs, long rhs) {
+ var tmp = values.get(lhs);
+ values.set(lhs, values.get(rhs));
+ values.set(rhs, tmp);
+ var tmpExtra = extraValues.get(lhs);
+ extraValues.set(lhs, extraValues.get(rhs));
+ extraValues.set(rhs, tmpExtra);
+ }
+
+ /**
+ * Allocate storage for more buckets and store the "next gather offset"
+ * for those new buckets. We always grow the storage by whole bucket's
+ * worth of slots at a time. We never allocate space for partial buckets.
+ */
+ private void grow(int bucket) {
+ long oldMax = values.size();
+ assert oldMax % bucketSize == 0;
+
+ long newSize = BigArrays.overSize(((long) bucket + 1) * bucketSize, PageCacheRecycler.DOUBLE_PAGE_SIZE, Double.BYTES);
+ // Round up to the next full bucket.
+ newSize = (newSize + bucketSize - 1) / bucketSize;
+ values = bigArrays.resize(values, newSize * bucketSize);
+ // Round up to the next full bucket.
+ extraValues = bigArrays.resize(extraValues, newSize * bucketSize);
+ // Set the next gather offsets for all newly allocated buckets.
+ fillGatherOffsets(oldMax);
+ }
+
+ /**
+ * Maintain the "next gather offsets" for newly allocated buckets.
+ */
+ private void fillGatherOffsets(long startingAt) {
+ int nextOffset = getBucketSize() - 1;
+ for (long bucketRoot = startingAt; bucketRoot < values.size(); bucketRoot += getBucketSize()) {
+ setNextGatherOffset(bucketRoot, nextOffset);
+ }
+ }
+
+ /**
+ * Heapify a bucket whose entries are in random order.
+ *
+ * This works by validating the heap property on each node, iterating
+ * "upwards", pushing any out of order parents "down". Check out the
+ * wikipedia
+ * entry on binary heaps for more about this.
+ *
+ *
+ * While this *looks* like it could easily be {@code O(n * log n)}, it is
+ * a fairly well studied algorithm attributed to Floyd. There's
+ * been a bunch of work that puts this at {@code O(n)}, close to 1.88n worst
+ * case.
+ *
D.E. Knuth, ”The Art of Computer Programming, Vol. 3, Sorting and Searching”
+ *
+ * @param rootIndex the index the start of the bucket
+ */
+ private void heapify(long rootIndex, int heapSize) {
+ int maxParent = heapSize / 2 - 1;
+ for (int parent = maxParent; parent >= 0; parent--) {
+ downHeap(rootIndex, parent, heapSize);
+ }
+ }
+
+ /**
+ * Sorts all the values in the heap using heap sort algorithm.
+ * This runs in {@code O(n log n)} time.
+ * @param rootIndex index of the start of the bucket
+ * @param heapSize Number of values that belong to the heap.
+ * Can be less than bucketSize.
+ * In such a case, the remaining values in range
+ * (rootIndex + heapSize, rootIndex + bucketSize)
+ * are *not* considered part of the heap.
+ */
+ private void heapSort(long rootIndex, int heapSize) {
+ while (heapSize > 0) {
+ swap(rootIndex, rootIndex + heapSize - 1);
+ heapSize--;
+ downHeap(rootIndex, 0, heapSize);
+ }
+ }
+
+ /**
+ * Correct the heap invariant of a parent and its children. This
+ * runs in {@code O(log n)} time.
+ * @param rootIndex index of the start of the bucket
+ * @param parent Index within the bucket of the parent to check.
+ * For example, 0 is the "root".
+ * @param heapSize Number of values that belong to the heap.
+ * Can be less than bucketSize.
+ * In such a case, the remaining values in range
+ * (rootIndex + heapSize, rootIndex + bucketSize)
+ * are *not* considered part of the heap.
+ */
+ private void downHeap(long rootIndex, int parent, int heapSize) {
+ while (true) {
+ long parentIndex = rootIndex + parent;
+ int worst = parent;
+ long worstIndex = parentIndex;
+ int leftChild = parent * 2 + 1;
+ long leftIndex = rootIndex + leftChild;
+ if (leftChild < heapSize) {
+ if (betterThan(values.get(worstIndex), values.get(leftIndex), extraValues.get(worstIndex), extraValues.get(leftIndex))) {
+ worst = leftChild;
+ worstIndex = leftIndex;
+ }
+ int rightChild = leftChild + 1;
+ long rightIndex = rootIndex + rightChild;
+ if (rightChild < heapSize
+ && betterThan(
+ values.get(worstIndex),
+ values.get(rightIndex),
+ extraValues.get(worstIndex),
+ extraValues.get(rightIndex)
+ )) {
+ worst = rightChild;
+ worstIndex = rightIndex;
+ }
+ }
+ if (worst == parent) {
+ break;
+ }
+ swap(worstIndex, parentIndex);
+ parent = worst;
+ }
+ }
+
+ @Override
+ public final void close() {
+ Releasables.close(values, extraValues, heapMode);
+ }
+}
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/DoubleIntBucketedSort.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/DoubleIntBucketedSort.java
new file mode 100644
index 0000000000000..c20fb8f9ce1a4
--- /dev/null
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/DoubleIntBucketedSort.java
@@ -0,0 +1,406 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.data.sort;
+
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.common.util.BitArray;
+import org.elasticsearch.common.util.IntArray;
+import org.elasticsearch.common.util.DoubleArray;
+import org.elasticsearch.common.util.PageCacheRecycler;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.core.Releasable;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.core.Tuple;
+import org.elasticsearch.search.sort.BucketedSort;
+import org.elasticsearch.search.sort.SortOrder;
+
+import java.util.stream.IntStream;
+
+/**
+ * Aggregates the top N {@code double} values per bucket.
+ * See {@link BucketedSort} for more information.
+ * This class is generated. Edit @{code X-BucketedSort.java.st} instead of this file.
+ */
+public class DoubleIntBucketedSort implements Releasable {
+
+ private final BigArrays bigArrays;
+ private final SortOrder order;
+ private final int bucketSize;
+ /**
+ * {@code true} if the bucket is in heap mode, {@code false} if
+ * it is still gathering.
+ */
+ private final BitArray heapMode;
+ /**
+ * An array containing all the values on all buckets. The structure is as follows:
+ *
+ * For each bucket, there are bucketSize elements, based on the bucket id (0, 1, 2...).
+ * Then, for each bucket, it can be in 2 states:
+ *
+ *
+ *
+ * Gather mode: All buckets start in gather mode, and remain here while they have less than bucketSize elements.
+ * In gather mode, the elements are stored in the array from the highest index to the lowest index.
+ * The lowest index contains the offset to the next slot to be filled.
+ *
+ * This allows us to insert elements in O(1) time.
+ *
+ *
+ * When the bucketSize-th element is collected, the bucket transitions to heap mode, by heapifying its contents.
+ *
+ *
+ *
+ * Heap mode: The bucket slots are organized as a min heap structure.
+ *
+ * The root of the heap is the minimum value in the bucket,
+ * which allows us to quickly discard new values that are not in the top N.
+ *
+ * It may or may not be inserted in the heap, depending on if it is better than the current root.
+ *
+ */
+ public void collect(double value, int extraValue, int bucket) {
+ long rootIndex = (long) bucket * bucketSize;
+ if (inHeapMode(bucket)) {
+ if (betterThan(value, values.get(rootIndex), extraValue, extraValues.get(rootIndex))) {
+ values.set(rootIndex, value);
+ extraValues.set(rootIndex, extraValue);
+ downHeap(rootIndex, 0, bucketSize);
+ }
+ return;
+ }
+ // Gathering mode
+ long requiredSize = rootIndex + bucketSize;
+ if (values.size() < requiredSize) {
+ grow(bucket);
+ }
+ int next = getNextGatherOffset(rootIndex);
+ assert 0 <= next && next < bucketSize
+ : "Expected next to be in the range of valid buckets [0 <= " + next + " < " + bucketSize + "]";
+ long index = next + rootIndex;
+ values.set(index, value);
+ extraValues.set(index, extraValue);
+ if (next == 0) {
+ heapMode.set(bucket);
+ heapify(rootIndex, bucketSize);
+ } else {
+ setNextGatherOffset(rootIndex, next - 1);
+ }
+ }
+
+ /**
+ * The order of the sort.
+ */
+ public SortOrder getOrder() {
+ return order;
+ }
+
+ /**
+ * The number of values to store per bucket.
+ */
+ public int getBucketSize() {
+ return bucketSize;
+ }
+
+ /**
+ * Get the first and last indexes (inclusive, exclusive) of the values for a bucket.
+ * Returns [0, 0] if the bucket has never been collected.
+ */
+ private Tuple getBucketValuesIndexes(int bucket) {
+ long rootIndex = (long) bucket * bucketSize;
+ if (rootIndex >= values.size()) {
+ // We've never seen this bucket.
+ return Tuple.tuple(0L, 0L);
+ }
+ long start = inHeapMode(bucket) ? rootIndex : (rootIndex + getNextGatherOffset(rootIndex) + 1);
+ long end = rootIndex + bucketSize;
+ return Tuple.tuple(start, end);
+ }
+
+ /**
+ * Merge the values from {@code other}'s {@code otherGroupId} into {@code groupId}.
+ */
+ public void merge(int groupId, DoubleIntBucketedSort other, int otherGroupId) {
+ var otherBounds = other.getBucketValuesIndexes(otherGroupId);
+
+ // TODO: This can be improved for heapified buckets by making use of the heap structures
+ for (long i = otherBounds.v1(); i < otherBounds.v2(); i++) {
+ collect(other.values.get(i), other.extraValues.get(i), groupId);
+ }
+ }
+
+ /**
+ * Creates a block with the values from the {@code selected} groups.
+ */
+ public void toBlocks(BlockFactory blockFactory, Block[] blocks, int offset, IntVector selected) {
+ // Check if the selected groups are all empty, to avoid allocating extra memory
+ if (allSelectedGroupsAreEmpty(selected)) {
+ Block constantNullBlock = blockFactory.newConstantNullBlock(selected.getPositionCount());
+ constantNullBlock.incRef();
+ blocks[offset] = constantNullBlock;
+ blocks[offset + 1] = constantNullBlock;
+ return;
+ }
+
+ try (
+ var builder = blockFactory.newDoubleBlockBuilder(selected.getPositionCount());
+ var extraBuilder = blockFactory.newIntBlockBuilder(selected.getPositionCount())
+ ) {
+ for (int s = 0; s < selected.getPositionCount(); s++) {
+ int bucket = selected.getInt(s);
+
+ var bounds = getBucketValuesIndexes(bucket);
+ var rootIndex = bounds.v1();
+ var size = bounds.v2() - bounds.v1();
+
+ if (size == 0) {
+ builder.appendNull();
+ extraBuilder.appendNull();
+ continue;
+ }
+
+ if (size == 1) {
+ builder.appendDouble(values.get(rootIndex));
+ extraBuilder.appendInt(extraValues.get(rootIndex));
+ continue;
+ }
+
+ // If we are in the gathering mode, we need to heapify before sorting.
+ if (inHeapMode(bucket) == false) {
+ heapify(rootIndex, (int) size);
+ }
+ heapSort(rootIndex, (int) size);
+
+ builder.beginPositionEntry();
+ extraBuilder.beginPositionEntry();
+ for (int i = 0; i < size; i++) {
+ builder.appendDouble(values.get(rootIndex + i));
+ extraBuilder.appendInt(extraValues.get(rootIndex + i));
+ }
+ builder.endPositionEntry();
+ extraBuilder.endPositionEntry();
+ }
+ blocks[offset] = builder.build();
+ blocks[offset + 1] = extraBuilder.build();
+ }
+ }
+
+ /**
+ * Checks if the selected groups are all empty.
+ */
+ private boolean allSelectedGroupsAreEmpty(IntVector selected) {
+ return IntStream.range(0, selected.getPositionCount()).map(selected::getInt).noneMatch(bucket -> {
+ var bounds = this.getBucketValuesIndexes(bucket);
+ var size = bounds.v2() - bounds.v1();
+ return size > 0;
+ });
+ }
+
+ /**
+ * Is this bucket a min heap {@code true} or in gathering mode {@code false}?
+ */
+ private boolean inHeapMode(int bucket) {
+ return heapMode.get(bucket);
+ }
+
+ /**
+ * Get the next index that should be "gathered" for a bucket rooted
+ * at {@code rootIndex}.
+ */
+ private int getNextGatherOffset(long rootIndex) {
+ return (int) values.get(rootIndex);
+ }
+
+ /**
+ * Set the next index that should be "gathered" for a bucket rooted
+ * at {@code rootIndex}.
+ */
+ private void setNextGatherOffset(long rootIndex, int offset) {
+ values.set(rootIndex, offset);
+ }
+
+ /**
+ * {@code true} if the entry at index {@code lhs} is "better" than
+ * the entry at {@code rhs}. "Better" in this means "lower" for
+ * {@link SortOrder#ASC} and "higher" for {@link SortOrder#DESC}.
+ */
+ private boolean betterThan(double lhs, double rhs, int lhsExtra, int rhsExtra) {
+ int res = Double.compare(lhs, rhs);
+ if (res != 0) {
+ return getOrder().reverseMul() * res < 0;
+ }
+ res = Integer.compare(lhsExtra, rhsExtra);
+ return getOrder().reverseMul() * res < 0;
+ }
+
+ /**
+ * Swap the data at two indices.
+ */
+ private void swap(long lhs, long rhs) {
+ var tmp = values.get(lhs);
+ values.set(lhs, values.get(rhs));
+ values.set(rhs, tmp);
+ var tmpExtra = extraValues.get(lhs);
+ extraValues.set(lhs, extraValues.get(rhs));
+ extraValues.set(rhs, tmpExtra);
+ }
+
+ /**
+ * Allocate storage for more buckets and store the "next gather offset"
+ * for those new buckets. We always grow the storage by whole bucket's
+ * worth of slots at a time. We never allocate space for partial buckets.
+ */
+ private void grow(int bucket) {
+ long oldMax = values.size();
+ assert oldMax % bucketSize == 0;
+
+ long newSize = BigArrays.overSize(((long) bucket + 1) * bucketSize, PageCacheRecycler.DOUBLE_PAGE_SIZE, Double.BYTES);
+ // Round up to the next full bucket.
+ newSize = (newSize + bucketSize - 1) / bucketSize;
+ values = bigArrays.resize(values, newSize * bucketSize);
+ // Round up to the next full bucket.
+ extraValues = bigArrays.resize(extraValues, newSize * bucketSize);
+ // Set the next gather offsets for all newly allocated buckets.
+ fillGatherOffsets(oldMax);
+ }
+
+ /**
+ * Maintain the "next gather offsets" for newly allocated buckets.
+ */
+ private void fillGatherOffsets(long startingAt) {
+ int nextOffset = getBucketSize() - 1;
+ for (long bucketRoot = startingAt; bucketRoot < values.size(); bucketRoot += getBucketSize()) {
+ setNextGatherOffset(bucketRoot, nextOffset);
+ }
+ }
+
+ /**
+ * Heapify a bucket whose entries are in random order.
+ *
+ * This works by validating the heap property on each node, iterating
+ * "upwards", pushing any out of order parents "down". Check out the
+ * wikipedia
+ * entry on binary heaps for more about this.
+ *
+ *
+ * While this *looks* like it could easily be {@code O(n * log n)}, it is
+ * a fairly well studied algorithm attributed to Floyd. There's
+ * been a bunch of work that puts this at {@code O(n)}, close to 1.88n worst
+ * case.
+ *
D.E. Knuth, ”The Art of Computer Programming, Vol. 3, Sorting and Searching”
+ *
+ * @param rootIndex the index the start of the bucket
+ */
+ private void heapify(long rootIndex, int heapSize) {
+ int maxParent = heapSize / 2 - 1;
+ for (int parent = maxParent; parent >= 0; parent--) {
+ downHeap(rootIndex, parent, heapSize);
+ }
+ }
+
+ /**
+ * Sorts all the values in the heap using heap sort algorithm.
+ * This runs in {@code O(n log n)} time.
+ * @param rootIndex index of the start of the bucket
+ * @param heapSize Number of values that belong to the heap.
+ * Can be less than bucketSize.
+ * In such a case, the remaining values in range
+ * (rootIndex + heapSize, rootIndex + bucketSize)
+ * are *not* considered part of the heap.
+ */
+ private void heapSort(long rootIndex, int heapSize) {
+ while (heapSize > 0) {
+ swap(rootIndex, rootIndex + heapSize - 1);
+ heapSize--;
+ downHeap(rootIndex, 0, heapSize);
+ }
+ }
+
+ /**
+ * Correct the heap invariant of a parent and its children. This
+ * runs in {@code O(log n)} time.
+ * @param rootIndex index of the start of the bucket
+ * @param parent Index within the bucket of the parent to check.
+ * For example, 0 is the "root".
+ * @param heapSize Number of values that belong to the heap.
+ * Can be less than bucketSize.
+ * In such a case, the remaining values in range
+ * (rootIndex + heapSize, rootIndex + bucketSize)
+ * are *not* considered part of the heap.
+ */
+ private void downHeap(long rootIndex, int parent, int heapSize) {
+ while (true) {
+ long parentIndex = rootIndex + parent;
+ int worst = parent;
+ long worstIndex = parentIndex;
+ int leftChild = parent * 2 + 1;
+ long leftIndex = rootIndex + leftChild;
+ if (leftChild < heapSize) {
+ if (betterThan(values.get(worstIndex), values.get(leftIndex), extraValues.get(worstIndex), extraValues.get(leftIndex))) {
+ worst = leftChild;
+ worstIndex = leftIndex;
+ }
+ int rightChild = leftChild + 1;
+ long rightIndex = rootIndex + rightChild;
+ if (rightChild < heapSize
+ && betterThan(
+ values.get(worstIndex),
+ values.get(rightIndex),
+ extraValues.get(worstIndex),
+ extraValues.get(rightIndex)
+ )) {
+ worst = rightChild;
+ worstIndex = rightIndex;
+ }
+ }
+ if (worst == parent) {
+ break;
+ }
+ swap(worstIndex, parentIndex);
+ parent = worst;
+ }
+ }
+
+ @Override
+ public final void close() {
+ Releasables.close(values, extraValues, heapMode);
+ }
+}
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/DoubleLongBucketedSort.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/DoubleLongBucketedSort.java
new file mode 100644
index 0000000000000..6f84c762c3258
--- /dev/null
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/DoubleLongBucketedSort.java
@@ -0,0 +1,406 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.data.sort;
+
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.common.util.BitArray;
+import org.elasticsearch.common.util.LongArray;
+import org.elasticsearch.common.util.DoubleArray;
+import org.elasticsearch.common.util.PageCacheRecycler;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.core.Releasable;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.core.Tuple;
+import org.elasticsearch.search.sort.BucketedSort;
+import org.elasticsearch.search.sort.SortOrder;
+
+import java.util.stream.IntStream;
+
+/**
+ * Aggregates the top N {@code double} values per bucket.
+ * See {@link BucketedSort} for more information.
+ * This class is generated. Edit @{code X-BucketedSort.java.st} instead of this file.
+ */
+public class DoubleLongBucketedSort implements Releasable {
+
+ private final BigArrays bigArrays;
+ private final SortOrder order;
+ private final int bucketSize;
+ /**
+ * {@code true} if the bucket is in heap mode, {@code false} if
+ * it is still gathering.
+ */
+ private final BitArray heapMode;
+ /**
+ * An array containing all the values on all buckets. The structure is as follows:
+ *
+ * For each bucket, there are bucketSize elements, based on the bucket id (0, 1, 2...).
+ * Then, for each bucket, it can be in 2 states:
+ *
+ *
+ *
+ * Gather mode: All buckets start in gather mode, and remain here while they have less than bucketSize elements.
+ * In gather mode, the elements are stored in the array from the highest index to the lowest index.
+ * The lowest index contains the offset to the next slot to be filled.
+ *
+ * This allows us to insert elements in O(1) time.
+ *
+ *
+ * When the bucketSize-th element is collected, the bucket transitions to heap mode, by heapifying its contents.
+ *
+ *
+ *
+ * Heap mode: The bucket slots are organized as a min heap structure.
+ *
+ * The root of the heap is the minimum value in the bucket,
+ * which allows us to quickly discard new values that are not in the top N.
+ *
+ * It may or may not be inserted in the heap, depending on if it is better than the current root.
+ *
+ */
+ public void collect(double value, long extraValue, int bucket) {
+ long rootIndex = (long) bucket * bucketSize;
+ if (inHeapMode(bucket)) {
+ if (betterThan(value, values.get(rootIndex), extraValue, extraValues.get(rootIndex))) {
+ values.set(rootIndex, value);
+ extraValues.set(rootIndex, extraValue);
+ downHeap(rootIndex, 0, bucketSize);
+ }
+ return;
+ }
+ // Gathering mode
+ long requiredSize = rootIndex + bucketSize;
+ if (values.size() < requiredSize) {
+ grow(bucket);
+ }
+ int next = getNextGatherOffset(rootIndex);
+ assert 0 <= next && next < bucketSize
+ : "Expected next to be in the range of valid buckets [0 <= " + next + " < " + bucketSize + "]";
+ long index = next + rootIndex;
+ values.set(index, value);
+ extraValues.set(index, extraValue);
+ if (next == 0) {
+ heapMode.set(bucket);
+ heapify(rootIndex, bucketSize);
+ } else {
+ setNextGatherOffset(rootIndex, next - 1);
+ }
+ }
+
+ /**
+ * The order of the sort.
+ */
+ public SortOrder getOrder() {
+ return order;
+ }
+
+ /**
+ * The number of values to store per bucket.
+ */
+ public int getBucketSize() {
+ return bucketSize;
+ }
+
+ /**
+ * Get the first and last indexes (inclusive, exclusive) of the values for a bucket.
+ * Returns [0, 0] if the bucket has never been collected.
+ */
+ private Tuple getBucketValuesIndexes(int bucket) {
+ long rootIndex = (long) bucket * bucketSize;
+ if (rootIndex >= values.size()) {
+ // We've never seen this bucket.
+ return Tuple.tuple(0L, 0L);
+ }
+ long start = inHeapMode(bucket) ? rootIndex : (rootIndex + getNextGatherOffset(rootIndex) + 1);
+ long end = rootIndex + bucketSize;
+ return Tuple.tuple(start, end);
+ }
+
+ /**
+ * Merge the values from {@code other}'s {@code otherGroupId} into {@code groupId}.
+ */
+ public void merge(int groupId, DoubleLongBucketedSort other, int otherGroupId) {
+ var otherBounds = other.getBucketValuesIndexes(otherGroupId);
+
+ // TODO: This can be improved for heapified buckets by making use of the heap structures
+ for (long i = otherBounds.v1(); i < otherBounds.v2(); i++) {
+ collect(other.values.get(i), other.extraValues.get(i), groupId);
+ }
+ }
+
+ /**
+ * Creates a block with the values from the {@code selected} groups.
+ */
+ public void toBlocks(BlockFactory blockFactory, Block[] blocks, int offset, IntVector selected) {
+ // Check if the selected groups are all empty, to avoid allocating extra memory
+ if (allSelectedGroupsAreEmpty(selected)) {
+ Block constantNullBlock = blockFactory.newConstantNullBlock(selected.getPositionCount());
+ constantNullBlock.incRef();
+ blocks[offset] = constantNullBlock;
+ blocks[offset + 1] = constantNullBlock;
+ return;
+ }
+
+ try (
+ var builder = blockFactory.newDoubleBlockBuilder(selected.getPositionCount());
+ var extraBuilder = blockFactory.newLongBlockBuilder(selected.getPositionCount())
+ ) {
+ for (int s = 0; s < selected.getPositionCount(); s++) {
+ int bucket = selected.getInt(s);
+
+ var bounds = getBucketValuesIndexes(bucket);
+ var rootIndex = bounds.v1();
+ var size = bounds.v2() - bounds.v1();
+
+ if (size == 0) {
+ builder.appendNull();
+ extraBuilder.appendNull();
+ continue;
+ }
+
+ if (size == 1) {
+ builder.appendDouble(values.get(rootIndex));
+ extraBuilder.appendLong(extraValues.get(rootIndex));
+ continue;
+ }
+
+ // If we are in the gathering mode, we need to heapify before sorting.
+ if (inHeapMode(bucket) == false) {
+ heapify(rootIndex, (int) size);
+ }
+ heapSort(rootIndex, (int) size);
+
+ builder.beginPositionEntry();
+ extraBuilder.beginPositionEntry();
+ for (int i = 0; i < size; i++) {
+ builder.appendDouble(values.get(rootIndex + i));
+ extraBuilder.appendLong(extraValues.get(rootIndex + i));
+ }
+ builder.endPositionEntry();
+ extraBuilder.endPositionEntry();
+ }
+ blocks[offset] = builder.build();
+ blocks[offset + 1] = extraBuilder.build();
+ }
+ }
+
+ /**
+ * Checks if the selected groups are all empty.
+ */
+ private boolean allSelectedGroupsAreEmpty(IntVector selected) {
+ return IntStream.range(0, selected.getPositionCount()).map(selected::getInt).noneMatch(bucket -> {
+ var bounds = this.getBucketValuesIndexes(bucket);
+ var size = bounds.v2() - bounds.v1();
+ return size > 0;
+ });
+ }
+
+ /**
+ * Is this bucket a min heap {@code true} or in gathering mode {@code false}?
+ */
+ private boolean inHeapMode(int bucket) {
+ return heapMode.get(bucket);
+ }
+
+ /**
+ * Get the next index that should be "gathered" for a bucket rooted
+ * at {@code rootIndex}.
+ */
+ private int getNextGatherOffset(long rootIndex) {
+ return (int) values.get(rootIndex);
+ }
+
+ /**
+ * Set the next index that should be "gathered" for a bucket rooted
+ * at {@code rootIndex}.
+ */
+ private void setNextGatherOffset(long rootIndex, int offset) {
+ values.set(rootIndex, offset);
+ }
+
+ /**
+ * {@code true} if the entry at index {@code lhs} is "better" than
+ * the entry at {@code rhs}. "Better" in this means "lower" for
+ * {@link SortOrder#ASC} and "higher" for {@link SortOrder#DESC}.
+ */
+ private boolean betterThan(double lhs, double rhs, long lhsExtra, long rhsExtra) {
+ int res = Double.compare(lhs, rhs);
+ if (res != 0) {
+ return getOrder().reverseMul() * res < 0;
+ }
+ res = Long.compare(lhsExtra, rhsExtra);
+ return getOrder().reverseMul() * res < 0;
+ }
+
+ /**
+ * Swap the data at two indices.
+ */
+ private void swap(long lhs, long rhs) {
+ var tmp = values.get(lhs);
+ values.set(lhs, values.get(rhs));
+ values.set(rhs, tmp);
+ var tmpExtra = extraValues.get(lhs);
+ extraValues.set(lhs, extraValues.get(rhs));
+ extraValues.set(rhs, tmpExtra);
+ }
+
+ /**
+ * Allocate storage for more buckets and store the "next gather offset"
+ * for those new buckets. We always grow the storage by whole bucket's
+ * worth of slots at a time. We never allocate space for partial buckets.
+ */
+ private void grow(int bucket) {
+ long oldMax = values.size();
+ assert oldMax % bucketSize == 0;
+
+ long newSize = BigArrays.overSize(((long) bucket + 1) * bucketSize, PageCacheRecycler.DOUBLE_PAGE_SIZE, Double.BYTES);
+ // Round up to the next full bucket.
+ newSize = (newSize + bucketSize - 1) / bucketSize;
+ values = bigArrays.resize(values, newSize * bucketSize);
+ // Round up to the next full bucket.
+ extraValues = bigArrays.resize(extraValues, newSize * bucketSize);
+ // Set the next gather offsets for all newly allocated buckets.
+ fillGatherOffsets(oldMax);
+ }
+
+ /**
+ * Maintain the "next gather offsets" for newly allocated buckets.
+ */
+ private void fillGatherOffsets(long startingAt) {
+ int nextOffset = getBucketSize() - 1;
+ for (long bucketRoot = startingAt; bucketRoot < values.size(); bucketRoot += getBucketSize()) {
+ setNextGatherOffset(bucketRoot, nextOffset);
+ }
+ }
+
+ /**
+ * Heapify a bucket whose entries are in random order.
+ *
+ * This works by validating the heap property on each node, iterating
+ * "upwards", pushing any out of order parents "down". Check out the
+ * wikipedia
+ * entry on binary heaps for more about this.
+ *
+ *
+ * While this *looks* like it could easily be {@code O(n * log n)}, it is
+ * a fairly well studied algorithm attributed to Floyd. There's
+ * been a bunch of work that puts this at {@code O(n)}, close to 1.88n worst
+ * case.
+ *
D.E. Knuth, ”The Art of Computer Programming, Vol. 3, Sorting and Searching”
+ *
+ * @param rootIndex the index the start of the bucket
+ */
+ private void heapify(long rootIndex, int heapSize) {
+ int maxParent = heapSize / 2 - 1;
+ for (int parent = maxParent; parent >= 0; parent--) {
+ downHeap(rootIndex, parent, heapSize);
+ }
+ }
+
+ /**
+ * Sorts all the values in the heap using heap sort algorithm.
+ * This runs in {@code O(n log n)} time.
+ * @param rootIndex index of the start of the bucket
+ * @param heapSize Number of values that belong to the heap.
+ * Can be less than bucketSize.
+ * In such a case, the remaining values in range
+ * (rootIndex + heapSize, rootIndex + bucketSize)
+ * are *not* considered part of the heap.
+ */
+ private void heapSort(long rootIndex, int heapSize) {
+ while (heapSize > 0) {
+ swap(rootIndex, rootIndex + heapSize - 1);
+ heapSize--;
+ downHeap(rootIndex, 0, heapSize);
+ }
+ }
+
+ /**
+ * Correct the heap invariant of a parent and its children. This
+ * runs in {@code O(log n)} time.
+ * @param rootIndex index of the start of the bucket
+ * @param parent Index within the bucket of the parent to check.
+ * For example, 0 is the "root".
+ * @param heapSize Number of values that belong to the heap.
+ * Can be less than bucketSize.
+ * In such a case, the remaining values in range
+ * (rootIndex + heapSize, rootIndex + bucketSize)
+ * are *not* considered part of the heap.
+ */
+ private void downHeap(long rootIndex, int parent, int heapSize) {
+ while (true) {
+ long parentIndex = rootIndex + parent;
+ int worst = parent;
+ long worstIndex = parentIndex;
+ int leftChild = parent * 2 + 1;
+ long leftIndex = rootIndex + leftChild;
+ if (leftChild < heapSize) {
+ if (betterThan(values.get(worstIndex), values.get(leftIndex), extraValues.get(worstIndex), extraValues.get(leftIndex))) {
+ worst = leftChild;
+ worstIndex = leftIndex;
+ }
+ int rightChild = leftChild + 1;
+ long rightIndex = rootIndex + rightChild;
+ if (rightChild < heapSize
+ && betterThan(
+ values.get(worstIndex),
+ values.get(rightIndex),
+ extraValues.get(worstIndex),
+ extraValues.get(rightIndex)
+ )) {
+ worst = rightChild;
+ worstIndex = rightIndex;
+ }
+ }
+ if (worst == parent) {
+ break;
+ }
+ swap(worstIndex, parentIndex);
+ parent = worst;
+ }
+ }
+
+ @Override
+ public final void close() {
+ Releasables.close(values, extraValues, heapMode);
+ }
+}
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/FloatBucketedSort.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/FloatBucketedSort.java
index 4afaa818855e4..5dee9e1555526 100644
--- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/FloatBucketedSort.java
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/FloatBucketedSort.java
@@ -23,7 +23,7 @@
import java.util.stream.IntStream;
/**
- * Aggregates the top N float values per bucket.
+ * Aggregates the top N {@code float} values per bucket.
* See {@link BucketedSort} for more information.
* This class is generated. Edit @{code X-BucketedSort.java.st} instead of this file.
*/
@@ -162,12 +162,7 @@ public void merge(int groupId, FloatBucketedSort other, int otherGroupId) {
*/
public Block toBlock(BlockFactory blockFactory, IntVector selected) {
// Check if the selected groups are all empty, to avoid allocating extra memory
- if (IntStream.range(0, selected.getPositionCount()).map(selected::getInt).noneMatch(bucket -> {
- var bounds = this.getBucketValuesIndexes(bucket);
- var size = bounds.v2() - bounds.v1();
-
- return size > 0;
- })) {
+ if (allSelectedGroupsAreEmpty(selected)) {
return blockFactory.newConstantNullBlock(selected.getPositionCount());
}
@@ -185,7 +180,7 @@ public Block toBlock(BlockFactory blockFactory, IntVector selected) {
}
if (size == 1) {
- builder.appendFloat(values.get(bounds.v1()));
+ builder.appendFloat(values.get(rootIndex));
continue;
}
@@ -197,7 +192,7 @@ public Block toBlock(BlockFactory blockFactory, IntVector selected) {
builder.beginPositionEntry();
for (int i = 0; i < size; i++) {
- builder.appendFloat(values.get(bounds.v1() + i));
+ builder.appendFloat(values.get(rootIndex + i));
}
builder.endPositionEntry();
}
@@ -205,6 +200,17 @@ public Block toBlock(BlockFactory blockFactory, IntVector selected) {
}
}
+ /**
+ * Checks if the selected groups are all empty.
+ */
+ private boolean allSelectedGroupsAreEmpty(IntVector selected) {
+ return IntStream.range(0, selected.getPositionCount()).map(selected::getInt).noneMatch(bucket -> {
+ var bounds = this.getBucketValuesIndexes(bucket);
+ var size = bounds.v2() - bounds.v1();
+ return size > 0;
+ });
+ }
+
/**
* Is this bucket a min heap {@code true} or in gathering mode {@code false}?
*/
@@ -234,7 +240,8 @@ private void setNextGatherOffset(long rootIndex, int offset) {
* {@link SortOrder#ASC} and "higher" for {@link SortOrder#DESC}.
*/
private boolean betterThan(float lhs, float rhs) {
- return getOrder().reverseMul() * Float.compare(lhs, rhs) < 0;
+ int res = Float.compare(lhs, rhs);
+ return getOrder().reverseMul() * res < 0;
}
/**
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/FloatDoubleBucketedSort.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/FloatDoubleBucketedSort.java
new file mode 100644
index 0000000000000..9e66583b2373e
--- /dev/null
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/FloatDoubleBucketedSort.java
@@ -0,0 +1,406 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.data.sort;
+
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.common.util.BitArray;
+import org.elasticsearch.common.util.DoubleArray;
+import org.elasticsearch.common.util.FloatArray;
+import org.elasticsearch.common.util.PageCacheRecycler;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.core.Releasable;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.core.Tuple;
+import org.elasticsearch.search.sort.BucketedSort;
+import org.elasticsearch.search.sort.SortOrder;
+
+import java.util.stream.IntStream;
+
+/**
+ * Aggregates the top N {@code float} values per bucket.
+ * See {@link BucketedSort} for more information.
+ * This class is generated. Edit @{code X-BucketedSort.java.st} instead of this file.
+ */
+public class FloatDoubleBucketedSort implements Releasable {
+
+ private final BigArrays bigArrays;
+ private final SortOrder order;
+ private final int bucketSize;
+ /**
+ * {@code true} if the bucket is in heap mode, {@code false} if
+ * it is still gathering.
+ */
+ private final BitArray heapMode;
+ /**
+ * An array containing all the values on all buckets. The structure is as follows:
+ *
+ * For each bucket, there are bucketSize elements, based on the bucket id (0, 1, 2...).
+ * Then, for each bucket, it can be in 2 states:
+ *
+ *
+ *
+ * Gather mode: All buckets start in gather mode, and remain here while they have less than bucketSize elements.
+ * In gather mode, the elements are stored in the array from the highest index to the lowest index.
+ * The lowest index contains the offset to the next slot to be filled.
+ *
+ * This allows us to insert elements in O(1) time.
+ *
+ *
+ * When the bucketSize-th element is collected, the bucket transitions to heap mode, by heapifying its contents.
+ *
+ *
+ *
+ * Heap mode: The bucket slots are organized as a min heap structure.
+ *
+ * The root of the heap is the minimum value in the bucket,
+ * which allows us to quickly discard new values that are not in the top N.
+ *
+ * It may or may not be inserted in the heap, depending on if it is better than the current root.
+ *
+ */
+ public void collect(float value, double extraValue, int bucket) {
+ long rootIndex = (long) bucket * bucketSize;
+ if (inHeapMode(bucket)) {
+ if (betterThan(value, values.get(rootIndex), extraValue, extraValues.get(rootIndex))) {
+ values.set(rootIndex, value);
+ extraValues.set(rootIndex, extraValue);
+ downHeap(rootIndex, 0, bucketSize);
+ }
+ return;
+ }
+ // Gathering mode
+ long requiredSize = rootIndex + bucketSize;
+ if (values.size() < requiredSize) {
+ grow(bucket);
+ }
+ int next = getNextGatherOffset(rootIndex);
+ assert 0 <= next && next < bucketSize
+ : "Expected next to be in the range of valid buckets [0 <= " + next + " < " + bucketSize + "]";
+ long index = next + rootIndex;
+ values.set(index, value);
+ extraValues.set(index, extraValue);
+ if (next == 0) {
+ heapMode.set(bucket);
+ heapify(rootIndex, bucketSize);
+ } else {
+ setNextGatherOffset(rootIndex, next - 1);
+ }
+ }
+
+ /**
+ * The order of the sort.
+ */
+ public SortOrder getOrder() {
+ return order;
+ }
+
+ /**
+ * The number of values to store per bucket.
+ */
+ public int getBucketSize() {
+ return bucketSize;
+ }
+
+ /**
+ * Get the first and last indexes (inclusive, exclusive) of the values for a bucket.
+ * Returns [0, 0] if the bucket has never been collected.
+ */
+ private Tuple getBucketValuesIndexes(int bucket) {
+ long rootIndex = (long) bucket * bucketSize;
+ if (rootIndex >= values.size()) {
+ // We've never seen this bucket.
+ return Tuple.tuple(0L, 0L);
+ }
+ long start = inHeapMode(bucket) ? rootIndex : (rootIndex + getNextGatherOffset(rootIndex) + 1);
+ long end = rootIndex + bucketSize;
+ return Tuple.tuple(start, end);
+ }
+
+ /**
+ * Merge the values from {@code other}'s {@code otherGroupId} into {@code groupId}.
+ */
+ public void merge(int groupId, FloatDoubleBucketedSort other, int otherGroupId) {
+ var otherBounds = other.getBucketValuesIndexes(otherGroupId);
+
+ // TODO: This can be improved for heapified buckets by making use of the heap structures
+ for (long i = otherBounds.v1(); i < otherBounds.v2(); i++) {
+ collect(other.values.get(i), other.extraValues.get(i), groupId);
+ }
+ }
+
+ /**
+ * Creates a block with the values from the {@code selected} groups.
+ */
+ public void toBlocks(BlockFactory blockFactory, Block[] blocks, int offset, IntVector selected) {
+ // Check if the selected groups are all empty, to avoid allocating extra memory
+ if (allSelectedGroupsAreEmpty(selected)) {
+ Block constantNullBlock = blockFactory.newConstantNullBlock(selected.getPositionCount());
+ constantNullBlock.incRef();
+ blocks[offset] = constantNullBlock;
+ blocks[offset + 1] = constantNullBlock;
+ return;
+ }
+
+ try (
+ var builder = blockFactory.newFloatBlockBuilder(selected.getPositionCount());
+ var extraBuilder = blockFactory.newDoubleBlockBuilder(selected.getPositionCount())
+ ) {
+ for (int s = 0; s < selected.getPositionCount(); s++) {
+ int bucket = selected.getInt(s);
+
+ var bounds = getBucketValuesIndexes(bucket);
+ var rootIndex = bounds.v1();
+ var size = bounds.v2() - bounds.v1();
+
+ if (size == 0) {
+ builder.appendNull();
+ extraBuilder.appendNull();
+ continue;
+ }
+
+ if (size == 1) {
+ builder.appendFloat(values.get(rootIndex));
+ extraBuilder.appendDouble(extraValues.get(rootIndex));
+ continue;
+ }
+
+ // If we are in the gathering mode, we need to heapify before sorting.
+ if (inHeapMode(bucket) == false) {
+ heapify(rootIndex, (int) size);
+ }
+ heapSort(rootIndex, (int) size);
+
+ builder.beginPositionEntry();
+ extraBuilder.beginPositionEntry();
+ for (int i = 0; i < size; i++) {
+ builder.appendFloat(values.get(rootIndex + i));
+ extraBuilder.appendDouble(extraValues.get(rootIndex + i));
+ }
+ builder.endPositionEntry();
+ extraBuilder.endPositionEntry();
+ }
+ blocks[offset] = builder.build();
+ blocks[offset + 1] = extraBuilder.build();
+ }
+ }
+
+ /**
+ * Checks if the selected groups are all empty.
+ */
+ private boolean allSelectedGroupsAreEmpty(IntVector selected) {
+ return IntStream.range(0, selected.getPositionCount()).map(selected::getInt).noneMatch(bucket -> {
+ var bounds = this.getBucketValuesIndexes(bucket);
+ var size = bounds.v2() - bounds.v1();
+ return size > 0;
+ });
+ }
+
+ /**
+ * Is this bucket a min heap {@code true} or in gathering mode {@code false}?
+ */
+ private boolean inHeapMode(int bucket) {
+ return heapMode.get(bucket);
+ }
+
+ /**
+ * Get the next index that should be "gathered" for a bucket rooted
+ * at {@code rootIndex}.
+ */
+ private int getNextGatherOffset(long rootIndex) {
+ return (int) values.get(rootIndex);
+ }
+
+ /**
+ * Set the next index that should be "gathered" for a bucket rooted
+ * at {@code rootIndex}.
+ */
+ private void setNextGatherOffset(long rootIndex, int offset) {
+ values.set(rootIndex, offset);
+ }
+
+ /**
+ * {@code true} if the entry at index {@code lhs} is "better" than
+ * the entry at {@code rhs}. "Better" in this means "lower" for
+ * {@link SortOrder#ASC} and "higher" for {@link SortOrder#DESC}.
+ */
+ private boolean betterThan(float lhs, float rhs, double lhsExtra, double rhsExtra) {
+ int res = Float.compare(lhs, rhs);
+ if (res != 0) {
+ return getOrder().reverseMul() * res < 0;
+ }
+ res = Double.compare(lhsExtra, rhsExtra);
+ return getOrder().reverseMul() * res < 0;
+ }
+
+ /**
+ * Swap the data at two indices.
+ */
+ private void swap(long lhs, long rhs) {
+ var tmp = values.get(lhs);
+ values.set(lhs, values.get(rhs));
+ values.set(rhs, tmp);
+ var tmpExtra = extraValues.get(lhs);
+ extraValues.set(lhs, extraValues.get(rhs));
+ extraValues.set(rhs, tmpExtra);
+ }
+
+ /**
+ * Allocate storage for more buckets and store the "next gather offset"
+ * for those new buckets. We always grow the storage by whole bucket's
+ * worth of slots at a time. We never allocate space for partial buckets.
+ */
+ private void grow(int bucket) {
+ long oldMax = values.size();
+ assert oldMax % bucketSize == 0;
+
+ long newSize = BigArrays.overSize(((long) bucket + 1) * bucketSize, PageCacheRecycler.FLOAT_PAGE_SIZE, Float.BYTES);
+ // Round up to the next full bucket.
+ newSize = (newSize + bucketSize - 1) / bucketSize;
+ values = bigArrays.resize(values, newSize * bucketSize);
+ // Round up to the next full bucket.
+ extraValues = bigArrays.resize(extraValues, newSize * bucketSize);
+ // Set the next gather offsets for all newly allocated buckets.
+ fillGatherOffsets(oldMax);
+ }
+
+ /**
+ * Maintain the "next gather offsets" for newly allocated buckets.
+ */
+ private void fillGatherOffsets(long startingAt) {
+ int nextOffset = getBucketSize() - 1;
+ for (long bucketRoot = startingAt; bucketRoot < values.size(); bucketRoot += getBucketSize()) {
+ setNextGatherOffset(bucketRoot, nextOffset);
+ }
+ }
+
+ /**
+ * Heapify a bucket whose entries are in random order.
+ *
+ * This works by validating the heap property on each node, iterating
+ * "upwards", pushing any out of order parents "down". Check out the
+ * wikipedia
+ * entry on binary heaps for more about this.
+ *
+ *
+ * While this *looks* like it could easily be {@code O(n * log n)}, it is
+ * a fairly well studied algorithm attributed to Floyd. There's
+ * been a bunch of work that puts this at {@code O(n)}, close to 1.88n worst
+ * case.
+ *
D.E. Knuth, ”The Art of Computer Programming, Vol. 3, Sorting and Searching”
+ *
+ * @param rootIndex the index the start of the bucket
+ */
+ private void heapify(long rootIndex, int heapSize) {
+ int maxParent = heapSize / 2 - 1;
+ for (int parent = maxParent; parent >= 0; parent--) {
+ downHeap(rootIndex, parent, heapSize);
+ }
+ }
+
+ /**
+ * Sorts all the values in the heap using heap sort algorithm.
+ * This runs in {@code O(n log n)} time.
+ * @param rootIndex index of the start of the bucket
+ * @param heapSize Number of values that belong to the heap.
+ * Can be less than bucketSize.
+ * In such a case, the remaining values in range
+ * (rootIndex + heapSize, rootIndex + bucketSize)
+ * are *not* considered part of the heap.
+ */
+ private void heapSort(long rootIndex, int heapSize) {
+ while (heapSize > 0) {
+ swap(rootIndex, rootIndex + heapSize - 1);
+ heapSize--;
+ downHeap(rootIndex, 0, heapSize);
+ }
+ }
+
+ /**
+ * Correct the heap invariant of a parent and its children. This
+ * runs in {@code O(log n)} time.
+ * @param rootIndex index of the start of the bucket
+ * @param parent Index within the bucket of the parent to check.
+ * For example, 0 is the "root".
+ * @param heapSize Number of values that belong to the heap.
+ * Can be less than bucketSize.
+ * In such a case, the remaining values in range
+ * (rootIndex + heapSize, rootIndex + bucketSize)
+ * are *not* considered part of the heap.
+ */
+ private void downHeap(long rootIndex, int parent, int heapSize) {
+ while (true) {
+ long parentIndex = rootIndex + parent;
+ int worst = parent;
+ long worstIndex = parentIndex;
+ int leftChild = parent * 2 + 1;
+ long leftIndex = rootIndex + leftChild;
+ if (leftChild < heapSize) {
+ if (betterThan(values.get(worstIndex), values.get(leftIndex), extraValues.get(worstIndex), extraValues.get(leftIndex))) {
+ worst = leftChild;
+ worstIndex = leftIndex;
+ }
+ int rightChild = leftChild + 1;
+ long rightIndex = rootIndex + rightChild;
+ if (rightChild < heapSize
+ && betterThan(
+ values.get(worstIndex),
+ values.get(rightIndex),
+ extraValues.get(worstIndex),
+ extraValues.get(rightIndex)
+ )) {
+ worst = rightChild;
+ worstIndex = rightIndex;
+ }
+ }
+ if (worst == parent) {
+ break;
+ }
+ swap(worstIndex, parentIndex);
+ parent = worst;
+ }
+ }
+
+ @Override
+ public final void close() {
+ Releasables.close(values, extraValues, heapMode);
+ }
+}
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/FloatFloatBucketedSort.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/FloatFloatBucketedSort.java
new file mode 100644
index 0000000000000..c3ad1e47f1698
--- /dev/null
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/FloatFloatBucketedSort.java
@@ -0,0 +1,406 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.data.sort;
+
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.common.util.BitArray;
+import org.elasticsearch.common.util.FloatArray;
+import org.elasticsearch.common.util.FloatArray;
+import org.elasticsearch.common.util.PageCacheRecycler;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.core.Releasable;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.core.Tuple;
+import org.elasticsearch.search.sort.BucketedSort;
+import org.elasticsearch.search.sort.SortOrder;
+
+import java.util.stream.IntStream;
+
+/**
+ * Aggregates the top N {@code float} values per bucket.
+ * See {@link BucketedSort} for more information.
+ * This class is generated. Edit @{code X-BucketedSort.java.st} instead of this file.
+ */
+public class FloatFloatBucketedSort implements Releasable {
+
+ private final BigArrays bigArrays;
+ private final SortOrder order;
+ private final int bucketSize;
+ /**
+ * {@code true} if the bucket is in heap mode, {@code false} if
+ * it is still gathering.
+ */
+ private final BitArray heapMode;
+ /**
+ * An array containing all the values on all buckets. The structure is as follows:
+ *
+ * For each bucket, there are bucketSize elements, based on the bucket id (0, 1, 2...).
+ * Then, for each bucket, it can be in 2 states:
+ *
+ *
+ *
+ * Gather mode: All buckets start in gather mode, and remain here while they have less than bucketSize elements.
+ * In gather mode, the elements are stored in the array from the highest index to the lowest index.
+ * The lowest index contains the offset to the next slot to be filled.
+ *
+ * This allows us to insert elements in O(1) time.
+ *
+ *
+ * When the bucketSize-th element is collected, the bucket transitions to heap mode, by heapifying its contents.
+ *
+ *
+ *
+ * Heap mode: The bucket slots are organized as a min heap structure.
+ *
+ * The root of the heap is the minimum value in the bucket,
+ * which allows us to quickly discard new values that are not in the top N.
+ *
+ * It may or may not be inserted in the heap, depending on if it is better than the current root.
+ *
+ */
+ public void collect(float value, float extraValue, int bucket) {
+ long rootIndex = (long) bucket * bucketSize;
+ if (inHeapMode(bucket)) {
+ if (betterThan(value, values.get(rootIndex), extraValue, extraValues.get(rootIndex))) {
+ values.set(rootIndex, value);
+ extraValues.set(rootIndex, extraValue);
+ downHeap(rootIndex, 0, bucketSize);
+ }
+ return;
+ }
+ // Gathering mode
+ long requiredSize = rootIndex + bucketSize;
+ if (values.size() < requiredSize) {
+ grow(bucket);
+ }
+ int next = getNextGatherOffset(rootIndex);
+ assert 0 <= next && next < bucketSize
+ : "Expected next to be in the range of valid buckets [0 <= " + next + " < " + bucketSize + "]";
+ long index = next + rootIndex;
+ values.set(index, value);
+ extraValues.set(index, extraValue);
+ if (next == 0) {
+ heapMode.set(bucket);
+ heapify(rootIndex, bucketSize);
+ } else {
+ setNextGatherOffset(rootIndex, next - 1);
+ }
+ }
+
+ /**
+ * The order of the sort.
+ */
+ public SortOrder getOrder() {
+ return order;
+ }
+
+ /**
+ * The number of values to store per bucket.
+ */
+ public int getBucketSize() {
+ return bucketSize;
+ }
+
+ /**
+ * Get the first and last indexes (inclusive, exclusive) of the values for a bucket.
+ * Returns [0, 0] if the bucket has never been collected.
+ */
+ private Tuple getBucketValuesIndexes(int bucket) {
+ long rootIndex = (long) bucket * bucketSize;
+ if (rootIndex >= values.size()) {
+ // We've never seen this bucket.
+ return Tuple.tuple(0L, 0L);
+ }
+ long start = inHeapMode(bucket) ? rootIndex : (rootIndex + getNextGatherOffset(rootIndex) + 1);
+ long end = rootIndex + bucketSize;
+ return Tuple.tuple(start, end);
+ }
+
+ /**
+ * Merge the values from {@code other}'s {@code otherGroupId} into {@code groupId}.
+ */
+ public void merge(int groupId, FloatFloatBucketedSort other, int otherGroupId) {
+ var otherBounds = other.getBucketValuesIndexes(otherGroupId);
+
+ // TODO: This can be improved for heapified buckets by making use of the heap structures
+ for (long i = otherBounds.v1(); i < otherBounds.v2(); i++) {
+ collect(other.values.get(i), other.extraValues.get(i), groupId);
+ }
+ }
+
+ /**
+ * Creates a block with the values from the {@code selected} groups.
+ */
+ public void toBlocks(BlockFactory blockFactory, Block[] blocks, int offset, IntVector selected) {
+ // Check if the selected groups are all empty, to avoid allocating extra memory
+ if (allSelectedGroupsAreEmpty(selected)) {
+ Block constantNullBlock = blockFactory.newConstantNullBlock(selected.getPositionCount());
+ constantNullBlock.incRef();
+ blocks[offset] = constantNullBlock;
+ blocks[offset + 1] = constantNullBlock;
+ return;
+ }
+
+ try (
+ var builder = blockFactory.newFloatBlockBuilder(selected.getPositionCount());
+ var extraBuilder = blockFactory.newFloatBlockBuilder(selected.getPositionCount())
+ ) {
+ for (int s = 0; s < selected.getPositionCount(); s++) {
+ int bucket = selected.getInt(s);
+
+ var bounds = getBucketValuesIndexes(bucket);
+ var rootIndex = bounds.v1();
+ var size = bounds.v2() - bounds.v1();
+
+ if (size == 0) {
+ builder.appendNull();
+ extraBuilder.appendNull();
+ continue;
+ }
+
+ if (size == 1) {
+ builder.appendFloat(values.get(rootIndex));
+ extraBuilder.appendFloat(extraValues.get(rootIndex));
+ continue;
+ }
+
+ // If we are in the gathering mode, we need to heapify before sorting.
+ if (inHeapMode(bucket) == false) {
+ heapify(rootIndex, (int) size);
+ }
+ heapSort(rootIndex, (int) size);
+
+ builder.beginPositionEntry();
+ extraBuilder.beginPositionEntry();
+ for (int i = 0; i < size; i++) {
+ builder.appendFloat(values.get(rootIndex + i));
+ extraBuilder.appendFloat(extraValues.get(rootIndex + i));
+ }
+ builder.endPositionEntry();
+ extraBuilder.endPositionEntry();
+ }
+ blocks[offset] = builder.build();
+ blocks[offset + 1] = extraBuilder.build();
+ }
+ }
+
+ /**
+ * Checks if the selected groups are all empty.
+ */
+ private boolean allSelectedGroupsAreEmpty(IntVector selected) {
+ return IntStream.range(0, selected.getPositionCount()).map(selected::getInt).noneMatch(bucket -> {
+ var bounds = this.getBucketValuesIndexes(bucket);
+ var size = bounds.v2() - bounds.v1();
+ return size > 0;
+ });
+ }
+
+ /**
+ * Is this bucket a min heap {@code true} or in gathering mode {@code false}?
+ */
+ private boolean inHeapMode(int bucket) {
+ return heapMode.get(bucket);
+ }
+
+ /**
+ * Get the next index that should be "gathered" for a bucket rooted
+ * at {@code rootIndex}.
+ */
+ private int getNextGatherOffset(long rootIndex) {
+ return (int) values.get(rootIndex);
+ }
+
+ /**
+ * Set the next index that should be "gathered" for a bucket rooted
+ * at {@code rootIndex}.
+ */
+ private void setNextGatherOffset(long rootIndex, int offset) {
+ values.set(rootIndex, offset);
+ }
+
+ /**
+ * {@code true} if the entry at index {@code lhs} is "better" than
+ * the entry at {@code rhs}. "Better" in this means "lower" for
+ * {@link SortOrder#ASC} and "higher" for {@link SortOrder#DESC}.
+ */
+ private boolean betterThan(float lhs, float rhs, float lhsExtra, float rhsExtra) {
+ int res = Float.compare(lhs, rhs);
+ if (res != 0) {
+ return getOrder().reverseMul() * res < 0;
+ }
+ res = Float.compare(lhsExtra, rhsExtra);
+ return getOrder().reverseMul() * res < 0;
+ }
+
+ /**
+ * Swap the data at two indices.
+ */
+ private void swap(long lhs, long rhs) {
+ var tmp = values.get(lhs);
+ values.set(lhs, values.get(rhs));
+ values.set(rhs, tmp);
+ var tmpExtra = extraValues.get(lhs);
+ extraValues.set(lhs, extraValues.get(rhs));
+ extraValues.set(rhs, tmpExtra);
+ }
+
+ /**
+ * Allocate storage for more buckets and store the "next gather offset"
+ * for those new buckets. We always grow the storage by whole bucket's
+ * worth of slots at a time. We never allocate space for partial buckets.
+ */
+ private void grow(int bucket) {
+ long oldMax = values.size();
+ assert oldMax % bucketSize == 0;
+
+ long newSize = BigArrays.overSize(((long) bucket + 1) * bucketSize, PageCacheRecycler.FLOAT_PAGE_SIZE, Float.BYTES);
+ // Round up to the next full bucket.
+ newSize = (newSize + bucketSize - 1) / bucketSize;
+ values = bigArrays.resize(values, newSize * bucketSize);
+ // Round up to the next full bucket.
+ extraValues = bigArrays.resize(extraValues, newSize * bucketSize);
+ // Set the next gather offsets for all newly allocated buckets.
+ fillGatherOffsets(oldMax);
+ }
+
+ /**
+ * Maintain the "next gather offsets" for newly allocated buckets.
+ */
+ private void fillGatherOffsets(long startingAt) {
+ int nextOffset = getBucketSize() - 1;
+ for (long bucketRoot = startingAt; bucketRoot < values.size(); bucketRoot += getBucketSize()) {
+ setNextGatherOffset(bucketRoot, nextOffset);
+ }
+ }
+
+ /**
+ * Heapify a bucket whose entries are in random order.
+ *
+ * This works by validating the heap property on each node, iterating
+ * "upwards", pushing any out of order parents "down". Check out the
+ * wikipedia
+ * entry on binary heaps for more about this.
+ *
+ *
+ * While this *looks* like it could easily be {@code O(n * log n)}, it is
+ * a fairly well studied algorithm attributed to Floyd. There's
+ * been a bunch of work that puts this at {@code O(n)}, close to 1.88n worst
+ * case.
+ *
D.E. Knuth, ”The Art of Computer Programming, Vol. 3, Sorting and Searching”
+ *
+ * @param rootIndex the index the start of the bucket
+ */
+ private void heapify(long rootIndex, int heapSize) {
+ int maxParent = heapSize / 2 - 1;
+ for (int parent = maxParent; parent >= 0; parent--) {
+ downHeap(rootIndex, parent, heapSize);
+ }
+ }
+
+ /**
+ * Sorts all the values in the heap using heap sort algorithm.
+ * This runs in {@code O(n log n)} time.
+ * @param rootIndex index of the start of the bucket
+ * @param heapSize Number of values that belong to the heap.
+ * Can be less than bucketSize.
+ * In such a case, the remaining values in range
+ * (rootIndex + heapSize, rootIndex + bucketSize)
+ * are *not* considered part of the heap.
+ */
+ private void heapSort(long rootIndex, int heapSize) {
+ while (heapSize > 0) {
+ swap(rootIndex, rootIndex + heapSize - 1);
+ heapSize--;
+ downHeap(rootIndex, 0, heapSize);
+ }
+ }
+
+ /**
+ * Correct the heap invariant of a parent and its children. This
+ * runs in {@code O(log n)} time.
+ * @param rootIndex index of the start of the bucket
+ * @param parent Index within the bucket of the parent to check.
+ * For example, 0 is the "root".
+ * @param heapSize Number of values that belong to the heap.
+ * Can be less than bucketSize.
+ * In such a case, the remaining values in range
+ * (rootIndex + heapSize, rootIndex + bucketSize)
+ * are *not* considered part of the heap.
+ */
+ private void downHeap(long rootIndex, int parent, int heapSize) {
+ while (true) {
+ long parentIndex = rootIndex + parent;
+ int worst = parent;
+ long worstIndex = parentIndex;
+ int leftChild = parent * 2 + 1;
+ long leftIndex = rootIndex + leftChild;
+ if (leftChild < heapSize) {
+ if (betterThan(values.get(worstIndex), values.get(leftIndex), extraValues.get(worstIndex), extraValues.get(leftIndex))) {
+ worst = leftChild;
+ worstIndex = leftIndex;
+ }
+ int rightChild = leftChild + 1;
+ long rightIndex = rootIndex + rightChild;
+ if (rightChild < heapSize
+ && betterThan(
+ values.get(worstIndex),
+ values.get(rightIndex),
+ extraValues.get(worstIndex),
+ extraValues.get(rightIndex)
+ )) {
+ worst = rightChild;
+ worstIndex = rightIndex;
+ }
+ }
+ if (worst == parent) {
+ break;
+ }
+ swap(worstIndex, parentIndex);
+ parent = worst;
+ }
+ }
+
+ @Override
+ public final void close() {
+ Releasables.close(values, extraValues, heapMode);
+ }
+}
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/FloatIntBucketedSort.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/FloatIntBucketedSort.java
new file mode 100644
index 0000000000000..fefe511a88586
--- /dev/null
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/FloatIntBucketedSort.java
@@ -0,0 +1,406 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.data.sort;
+
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.common.util.BitArray;
+import org.elasticsearch.common.util.IntArray;
+import org.elasticsearch.common.util.FloatArray;
+import org.elasticsearch.common.util.PageCacheRecycler;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.core.Releasable;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.core.Tuple;
+import org.elasticsearch.search.sort.BucketedSort;
+import org.elasticsearch.search.sort.SortOrder;
+
+import java.util.stream.IntStream;
+
+/**
+ * Aggregates the top N {@code float} values per bucket.
+ * See {@link BucketedSort} for more information.
+ * This class is generated. Edit @{code X-BucketedSort.java.st} instead of this file.
+ */
+public class FloatIntBucketedSort implements Releasable {
+
+ private final BigArrays bigArrays;
+ private final SortOrder order;
+ private final int bucketSize;
+ /**
+ * {@code true} if the bucket is in heap mode, {@code false} if
+ * it is still gathering.
+ */
+ private final BitArray heapMode;
+ /**
+ * An array containing all the values on all buckets. The structure is as follows:
+ *
+ * For each bucket, there are bucketSize elements, based on the bucket id (0, 1, 2...).
+ * Then, for each bucket, it can be in 2 states:
+ *
+ *
+ *
+ * Gather mode: All buckets start in gather mode, and remain here while they have less than bucketSize elements.
+ * In gather mode, the elements are stored in the array from the highest index to the lowest index.
+ * The lowest index contains the offset to the next slot to be filled.
+ *
+ * This allows us to insert elements in O(1) time.
+ *
+ *
+ * When the bucketSize-th element is collected, the bucket transitions to heap mode, by heapifying its contents.
+ *
+ *
+ *
+ * Heap mode: The bucket slots are organized as a min heap structure.
+ *
+ * The root of the heap is the minimum value in the bucket,
+ * which allows us to quickly discard new values that are not in the top N.
+ *
+ * It may or may not be inserted in the heap, depending on if it is better than the current root.
+ *
+ */
+ public void collect(float value, int extraValue, int bucket) {
+ long rootIndex = (long) bucket * bucketSize;
+ if (inHeapMode(bucket)) {
+ if (betterThan(value, values.get(rootIndex), extraValue, extraValues.get(rootIndex))) {
+ values.set(rootIndex, value);
+ extraValues.set(rootIndex, extraValue);
+ downHeap(rootIndex, 0, bucketSize);
+ }
+ return;
+ }
+ // Gathering mode
+ long requiredSize = rootIndex + bucketSize;
+ if (values.size() < requiredSize) {
+ grow(bucket);
+ }
+ int next = getNextGatherOffset(rootIndex);
+ assert 0 <= next && next < bucketSize
+ : "Expected next to be in the range of valid buckets [0 <= " + next + " < " + bucketSize + "]";
+ long index = next + rootIndex;
+ values.set(index, value);
+ extraValues.set(index, extraValue);
+ if (next == 0) {
+ heapMode.set(bucket);
+ heapify(rootIndex, bucketSize);
+ } else {
+ setNextGatherOffset(rootIndex, next - 1);
+ }
+ }
+
+ /**
+ * The order of the sort.
+ */
+ public SortOrder getOrder() {
+ return order;
+ }
+
+ /**
+ * The number of values to store per bucket.
+ */
+ public int getBucketSize() {
+ return bucketSize;
+ }
+
+ /**
+ * Get the first and last indexes (inclusive, exclusive) of the values for a bucket.
+ * Returns [0, 0] if the bucket has never been collected.
+ */
+ private Tuple getBucketValuesIndexes(int bucket) {
+ long rootIndex = (long) bucket * bucketSize;
+ if (rootIndex >= values.size()) {
+ // We've never seen this bucket.
+ return Tuple.tuple(0L, 0L);
+ }
+ long start = inHeapMode(bucket) ? rootIndex : (rootIndex + getNextGatherOffset(rootIndex) + 1);
+ long end = rootIndex + bucketSize;
+ return Tuple.tuple(start, end);
+ }
+
+ /**
+ * Merge the values from {@code other}'s {@code otherGroupId} into {@code groupId}.
+ */
+ public void merge(int groupId, FloatIntBucketedSort other, int otherGroupId) {
+ var otherBounds = other.getBucketValuesIndexes(otherGroupId);
+
+ // TODO: This can be improved for heapified buckets by making use of the heap structures
+ for (long i = otherBounds.v1(); i < otherBounds.v2(); i++) {
+ collect(other.values.get(i), other.extraValues.get(i), groupId);
+ }
+ }
+
+ /**
+ * Creates a block with the values from the {@code selected} groups.
+ */
+ public void toBlocks(BlockFactory blockFactory, Block[] blocks, int offset, IntVector selected) {
+ // Check if the selected groups are all empty, to avoid allocating extra memory
+ if (allSelectedGroupsAreEmpty(selected)) {
+ Block constantNullBlock = blockFactory.newConstantNullBlock(selected.getPositionCount());
+ constantNullBlock.incRef();
+ blocks[offset] = constantNullBlock;
+ blocks[offset + 1] = constantNullBlock;
+ return;
+ }
+
+ try (
+ var builder = blockFactory.newFloatBlockBuilder(selected.getPositionCount());
+ var extraBuilder = blockFactory.newIntBlockBuilder(selected.getPositionCount())
+ ) {
+ for (int s = 0; s < selected.getPositionCount(); s++) {
+ int bucket = selected.getInt(s);
+
+ var bounds = getBucketValuesIndexes(bucket);
+ var rootIndex = bounds.v1();
+ var size = bounds.v2() - bounds.v1();
+
+ if (size == 0) {
+ builder.appendNull();
+ extraBuilder.appendNull();
+ continue;
+ }
+
+ if (size == 1) {
+ builder.appendFloat(values.get(rootIndex));
+ extraBuilder.appendInt(extraValues.get(rootIndex));
+ continue;
+ }
+
+ // If we are in the gathering mode, we need to heapify before sorting.
+ if (inHeapMode(bucket) == false) {
+ heapify(rootIndex, (int) size);
+ }
+ heapSort(rootIndex, (int) size);
+
+ builder.beginPositionEntry();
+ extraBuilder.beginPositionEntry();
+ for (int i = 0; i < size; i++) {
+ builder.appendFloat(values.get(rootIndex + i));
+ extraBuilder.appendInt(extraValues.get(rootIndex + i));
+ }
+ builder.endPositionEntry();
+ extraBuilder.endPositionEntry();
+ }
+ blocks[offset] = builder.build();
+ blocks[offset + 1] = extraBuilder.build();
+ }
+ }
+
+ /**
+ * Checks if the selected groups are all empty.
+ */
+ private boolean allSelectedGroupsAreEmpty(IntVector selected) {
+ return IntStream.range(0, selected.getPositionCount()).map(selected::getInt).noneMatch(bucket -> {
+ var bounds = this.getBucketValuesIndexes(bucket);
+ var size = bounds.v2() - bounds.v1();
+ return size > 0;
+ });
+ }
+
+ /**
+ * Is this bucket a min heap {@code true} or in gathering mode {@code false}?
+ */
+ private boolean inHeapMode(int bucket) {
+ return heapMode.get(bucket);
+ }
+
+ /**
+ * Get the next index that should be "gathered" for a bucket rooted
+ * at {@code rootIndex}.
+ */
+ private int getNextGatherOffset(long rootIndex) {
+ return (int) values.get(rootIndex);
+ }
+
+ /**
+ * Set the next index that should be "gathered" for a bucket rooted
+ * at {@code rootIndex}.
+ */
+ private void setNextGatherOffset(long rootIndex, int offset) {
+ values.set(rootIndex, offset);
+ }
+
+ /**
+ * {@code true} if the entry at index {@code lhs} is "better" than
+ * the entry at {@code rhs}. "Better" in this means "lower" for
+ * {@link SortOrder#ASC} and "higher" for {@link SortOrder#DESC}.
+ */
+ private boolean betterThan(float lhs, float rhs, int lhsExtra, int rhsExtra) {
+ int res = Float.compare(lhs, rhs);
+ if (res != 0) {
+ return getOrder().reverseMul() * res < 0;
+ }
+ res = Integer.compare(lhsExtra, rhsExtra);
+ return getOrder().reverseMul() * res < 0;
+ }
+
+ /**
+ * Swap the data at two indices.
+ */
+ private void swap(long lhs, long rhs) {
+ var tmp = values.get(lhs);
+ values.set(lhs, values.get(rhs));
+ values.set(rhs, tmp);
+ var tmpExtra = extraValues.get(lhs);
+ extraValues.set(lhs, extraValues.get(rhs));
+ extraValues.set(rhs, tmpExtra);
+ }
+
+ /**
+ * Allocate storage for more buckets and store the "next gather offset"
+ * for those new buckets. We always grow the storage by whole bucket's
+ * worth of slots at a time. We never allocate space for partial buckets.
+ */
+ private void grow(int bucket) {
+ long oldMax = values.size();
+ assert oldMax % bucketSize == 0;
+
+ long newSize = BigArrays.overSize(((long) bucket + 1) * bucketSize, PageCacheRecycler.FLOAT_PAGE_SIZE, Float.BYTES);
+ // Round up to the next full bucket.
+ newSize = (newSize + bucketSize - 1) / bucketSize;
+ values = bigArrays.resize(values, newSize * bucketSize);
+ // Round up to the next full bucket.
+ extraValues = bigArrays.resize(extraValues, newSize * bucketSize);
+ // Set the next gather offsets for all newly allocated buckets.
+ fillGatherOffsets(oldMax);
+ }
+
+ /**
+ * Maintain the "next gather offsets" for newly allocated buckets.
+ */
+ private void fillGatherOffsets(long startingAt) {
+ int nextOffset = getBucketSize() - 1;
+ for (long bucketRoot = startingAt; bucketRoot < values.size(); bucketRoot += getBucketSize()) {
+ setNextGatherOffset(bucketRoot, nextOffset);
+ }
+ }
+
+ /**
+ * Heapify a bucket whose entries are in random order.
+ *
+ * This works by validating the heap property on each node, iterating
+ * "upwards", pushing any out of order parents "down". Check out the
+ * wikipedia
+ * entry on binary heaps for more about this.
+ *
+ *
+ * While this *looks* like it could easily be {@code O(n * log n)}, it is
+ * a fairly well studied algorithm attributed to Floyd. There's
+ * been a bunch of work that puts this at {@code O(n)}, close to 1.88n worst
+ * case.
+ *
D.E. Knuth, ”The Art of Computer Programming, Vol. 3, Sorting and Searching”
+ *
+ * @param rootIndex the index the start of the bucket
+ */
+ private void heapify(long rootIndex, int heapSize) {
+ int maxParent = heapSize / 2 - 1;
+ for (int parent = maxParent; parent >= 0; parent--) {
+ downHeap(rootIndex, parent, heapSize);
+ }
+ }
+
+ /**
+ * Sorts all the values in the heap using heap sort algorithm.
+ * This runs in {@code O(n log n)} time.
+ * @param rootIndex index of the start of the bucket
+ * @param heapSize Number of values that belong to the heap.
+ * Can be less than bucketSize.
+ * In such a case, the remaining values in range
+ * (rootIndex + heapSize, rootIndex + bucketSize)
+ * are *not* considered part of the heap.
+ */
+ private void heapSort(long rootIndex, int heapSize) {
+ while (heapSize > 0) {
+ swap(rootIndex, rootIndex + heapSize - 1);
+ heapSize--;
+ downHeap(rootIndex, 0, heapSize);
+ }
+ }
+
+ /**
+ * Correct the heap invariant of a parent and its children. This
+ * runs in {@code O(log n)} time.
+ * @param rootIndex index of the start of the bucket
+ * @param parent Index within the bucket of the parent to check.
+ * For example, 0 is the "root".
+ * @param heapSize Number of values that belong to the heap.
+ * Can be less than bucketSize.
+ * In such a case, the remaining values in range
+ * (rootIndex + heapSize, rootIndex + bucketSize)
+ * are *not* considered part of the heap.
+ */
+ private void downHeap(long rootIndex, int parent, int heapSize) {
+ while (true) {
+ long parentIndex = rootIndex + parent;
+ int worst = parent;
+ long worstIndex = parentIndex;
+ int leftChild = parent * 2 + 1;
+ long leftIndex = rootIndex + leftChild;
+ if (leftChild < heapSize) {
+ if (betterThan(values.get(worstIndex), values.get(leftIndex), extraValues.get(worstIndex), extraValues.get(leftIndex))) {
+ worst = leftChild;
+ worstIndex = leftIndex;
+ }
+ int rightChild = leftChild + 1;
+ long rightIndex = rootIndex + rightChild;
+ if (rightChild < heapSize
+ && betterThan(
+ values.get(worstIndex),
+ values.get(rightIndex),
+ extraValues.get(worstIndex),
+ extraValues.get(rightIndex)
+ )) {
+ worst = rightChild;
+ worstIndex = rightIndex;
+ }
+ }
+ if (worst == parent) {
+ break;
+ }
+ swap(worstIndex, parentIndex);
+ parent = worst;
+ }
+ }
+
+ @Override
+ public final void close() {
+ Releasables.close(values, extraValues, heapMode);
+ }
+}
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/FloatLongBucketedSort.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/FloatLongBucketedSort.java
new file mode 100644
index 0000000000000..32aa8e9fdb191
--- /dev/null
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/FloatLongBucketedSort.java
@@ -0,0 +1,406 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.data.sort;
+
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.common.util.BitArray;
+import org.elasticsearch.common.util.LongArray;
+import org.elasticsearch.common.util.FloatArray;
+import org.elasticsearch.common.util.PageCacheRecycler;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.core.Releasable;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.core.Tuple;
+import org.elasticsearch.search.sort.BucketedSort;
+import org.elasticsearch.search.sort.SortOrder;
+
+import java.util.stream.IntStream;
+
+/**
+ * Aggregates the top N {@code float} values per bucket.
+ * See {@link BucketedSort} for more information.
+ * This class is generated. Edit @{code X-BucketedSort.java.st} instead of this file.
+ */
+public class FloatLongBucketedSort implements Releasable {
+
+ private final BigArrays bigArrays;
+ private final SortOrder order;
+ private final int bucketSize;
+ /**
+ * {@code true} if the bucket is in heap mode, {@code false} if
+ * it is still gathering.
+ */
+ private final BitArray heapMode;
+ /**
+ * An array containing all the values on all buckets. The structure is as follows:
+ *
+ * For each bucket, there are bucketSize elements, based on the bucket id (0, 1, 2...).
+ * Then, for each bucket, it can be in 2 states:
+ *
+ *
+ *
+ * Gather mode: All buckets start in gather mode, and remain here while they have less than bucketSize elements.
+ * In gather mode, the elements are stored in the array from the highest index to the lowest index.
+ * The lowest index contains the offset to the next slot to be filled.
+ *
+ * This allows us to insert elements in O(1) time.
+ *
+ *
+ * When the bucketSize-th element is collected, the bucket transitions to heap mode, by heapifying its contents.
+ *
+ *
+ *
+ * Heap mode: The bucket slots are organized as a min heap structure.
+ *
+ * The root of the heap is the minimum value in the bucket,
+ * which allows us to quickly discard new values that are not in the top N.
+ *
+ * It may or may not be inserted in the heap, depending on if it is better than the current root.
+ *
+ */
+ public void collect(float value, long extraValue, int bucket) {
+ long rootIndex = (long) bucket * bucketSize;
+ if (inHeapMode(bucket)) {
+ if (betterThan(value, values.get(rootIndex), extraValue, extraValues.get(rootIndex))) {
+ values.set(rootIndex, value);
+ extraValues.set(rootIndex, extraValue);
+ downHeap(rootIndex, 0, bucketSize);
+ }
+ return;
+ }
+ // Gathering mode
+ long requiredSize = rootIndex + bucketSize;
+ if (values.size() < requiredSize) {
+ grow(bucket);
+ }
+ int next = getNextGatherOffset(rootIndex);
+ assert 0 <= next && next < bucketSize
+ : "Expected next to be in the range of valid buckets [0 <= " + next + " < " + bucketSize + "]";
+ long index = next + rootIndex;
+ values.set(index, value);
+ extraValues.set(index, extraValue);
+ if (next == 0) {
+ heapMode.set(bucket);
+ heapify(rootIndex, bucketSize);
+ } else {
+ setNextGatherOffset(rootIndex, next - 1);
+ }
+ }
+
+ /**
+ * The order of the sort.
+ */
+ public SortOrder getOrder() {
+ return order;
+ }
+
+ /**
+ * The number of values to store per bucket.
+ */
+ public int getBucketSize() {
+ return bucketSize;
+ }
+
+ /**
+ * Get the first and last indexes (inclusive, exclusive) of the values for a bucket.
+ * Returns [0, 0] if the bucket has never been collected.
+ */
+ private Tuple getBucketValuesIndexes(int bucket) {
+ long rootIndex = (long) bucket * bucketSize;
+ if (rootIndex >= values.size()) {
+ // We've never seen this bucket.
+ return Tuple.tuple(0L, 0L);
+ }
+ long start = inHeapMode(bucket) ? rootIndex : (rootIndex + getNextGatherOffset(rootIndex) + 1);
+ long end = rootIndex + bucketSize;
+ return Tuple.tuple(start, end);
+ }
+
+ /**
+ * Merge the values from {@code other}'s {@code otherGroupId} into {@code groupId}.
+ */
+ public void merge(int groupId, FloatLongBucketedSort other, int otherGroupId) {
+ var otherBounds = other.getBucketValuesIndexes(otherGroupId);
+
+ // TODO: This can be improved for heapified buckets by making use of the heap structures
+ for (long i = otherBounds.v1(); i < otherBounds.v2(); i++) {
+ collect(other.values.get(i), other.extraValues.get(i), groupId);
+ }
+ }
+
+ /**
+ * Creates a block with the values from the {@code selected} groups.
+ */
+ public void toBlocks(BlockFactory blockFactory, Block[] blocks, int offset, IntVector selected) {
+ // Check if the selected groups are all empty, to avoid allocating extra memory
+ if (allSelectedGroupsAreEmpty(selected)) {
+ Block constantNullBlock = blockFactory.newConstantNullBlock(selected.getPositionCount());
+ constantNullBlock.incRef();
+ blocks[offset] = constantNullBlock;
+ blocks[offset + 1] = constantNullBlock;
+ return;
+ }
+
+ try (
+ var builder = blockFactory.newFloatBlockBuilder(selected.getPositionCount());
+ var extraBuilder = blockFactory.newLongBlockBuilder(selected.getPositionCount())
+ ) {
+ for (int s = 0; s < selected.getPositionCount(); s++) {
+ int bucket = selected.getInt(s);
+
+ var bounds = getBucketValuesIndexes(bucket);
+ var rootIndex = bounds.v1();
+ var size = bounds.v2() - bounds.v1();
+
+ if (size == 0) {
+ builder.appendNull();
+ extraBuilder.appendNull();
+ continue;
+ }
+
+ if (size == 1) {
+ builder.appendFloat(values.get(rootIndex));
+ extraBuilder.appendLong(extraValues.get(rootIndex));
+ continue;
+ }
+
+ // If we are in the gathering mode, we need to heapify before sorting.
+ if (inHeapMode(bucket) == false) {
+ heapify(rootIndex, (int) size);
+ }
+ heapSort(rootIndex, (int) size);
+
+ builder.beginPositionEntry();
+ extraBuilder.beginPositionEntry();
+ for (int i = 0; i < size; i++) {
+ builder.appendFloat(values.get(rootIndex + i));
+ extraBuilder.appendLong(extraValues.get(rootIndex + i));
+ }
+ builder.endPositionEntry();
+ extraBuilder.endPositionEntry();
+ }
+ blocks[offset] = builder.build();
+ blocks[offset + 1] = extraBuilder.build();
+ }
+ }
+
+ /**
+ * Checks if the selected groups are all empty.
+ */
+ private boolean allSelectedGroupsAreEmpty(IntVector selected) {
+ return IntStream.range(0, selected.getPositionCount()).map(selected::getInt).noneMatch(bucket -> {
+ var bounds = this.getBucketValuesIndexes(bucket);
+ var size = bounds.v2() - bounds.v1();
+ return size > 0;
+ });
+ }
+
+ /**
+ * Is this bucket a min heap {@code true} or in gathering mode {@code false}?
+ */
+ private boolean inHeapMode(int bucket) {
+ return heapMode.get(bucket);
+ }
+
+ /**
+ * Get the next index that should be "gathered" for a bucket rooted
+ * at {@code rootIndex}.
+ */
+ private int getNextGatherOffset(long rootIndex) {
+ return (int) values.get(rootIndex);
+ }
+
+ /**
+ * Set the next index that should be "gathered" for a bucket rooted
+ * at {@code rootIndex}.
+ */
+ private void setNextGatherOffset(long rootIndex, int offset) {
+ values.set(rootIndex, offset);
+ }
+
+ /**
+ * {@code true} if the entry at index {@code lhs} is "better" than
+ * the entry at {@code rhs}. "Better" in this means "lower" for
+ * {@link SortOrder#ASC} and "higher" for {@link SortOrder#DESC}.
+ */
+ private boolean betterThan(float lhs, float rhs, long lhsExtra, long rhsExtra) {
+ int res = Float.compare(lhs, rhs);
+ if (res != 0) {
+ return getOrder().reverseMul() * res < 0;
+ }
+ res = Long.compare(lhsExtra, rhsExtra);
+ return getOrder().reverseMul() * res < 0;
+ }
+
+ /**
+ * Swap the data at two indices.
+ */
+ private void swap(long lhs, long rhs) {
+ var tmp = values.get(lhs);
+ values.set(lhs, values.get(rhs));
+ values.set(rhs, tmp);
+ var tmpExtra = extraValues.get(lhs);
+ extraValues.set(lhs, extraValues.get(rhs));
+ extraValues.set(rhs, tmpExtra);
+ }
+
+ /**
+ * Allocate storage for more buckets and store the "next gather offset"
+ * for those new buckets. We always grow the storage by whole bucket's
+ * worth of slots at a time. We never allocate space for partial buckets.
+ */
+ private void grow(int bucket) {
+ long oldMax = values.size();
+ assert oldMax % bucketSize == 0;
+
+ long newSize = BigArrays.overSize(((long) bucket + 1) * bucketSize, PageCacheRecycler.FLOAT_PAGE_SIZE, Float.BYTES);
+ // Round up to the next full bucket.
+ newSize = (newSize + bucketSize - 1) / bucketSize;
+ values = bigArrays.resize(values, newSize * bucketSize);
+ // Round up to the next full bucket.
+ extraValues = bigArrays.resize(extraValues, newSize * bucketSize);
+ // Set the next gather offsets for all newly allocated buckets.
+ fillGatherOffsets(oldMax);
+ }
+
+ /**
+ * Maintain the "next gather offsets" for newly allocated buckets.
+ */
+ private void fillGatherOffsets(long startingAt) {
+ int nextOffset = getBucketSize() - 1;
+ for (long bucketRoot = startingAt; bucketRoot < values.size(); bucketRoot += getBucketSize()) {
+ setNextGatherOffset(bucketRoot, nextOffset);
+ }
+ }
+
+ /**
+ * Heapify a bucket whose entries are in random order.
+ *
+ * This works by validating the heap property on each node, iterating
+ * "upwards", pushing any out of order parents "down". Check out the
+ * wikipedia
+ * entry on binary heaps for more about this.
+ *
+ *
+ * While this *looks* like it could easily be {@code O(n * log n)}, it is
+ * a fairly well studied algorithm attributed to Floyd. There's
+ * been a bunch of work that puts this at {@code O(n)}, close to 1.88n worst
+ * case.
+ *
D.E. Knuth, ”The Art of Computer Programming, Vol. 3, Sorting and Searching”
+ *
+ * @param rootIndex the index the start of the bucket
+ */
+ private void heapify(long rootIndex, int heapSize) {
+ int maxParent = heapSize / 2 - 1;
+ for (int parent = maxParent; parent >= 0; parent--) {
+ downHeap(rootIndex, parent, heapSize);
+ }
+ }
+
+ /**
+ * Sorts all the values in the heap using heap sort algorithm.
+ * This runs in {@code O(n log n)} time.
+ * @param rootIndex index of the start of the bucket
+ * @param heapSize Number of values that belong to the heap.
+ * Can be less than bucketSize.
+ * In such a case, the remaining values in range
+ * (rootIndex + heapSize, rootIndex + bucketSize)
+ * are *not* considered part of the heap.
+ */
+ private void heapSort(long rootIndex, int heapSize) {
+ while (heapSize > 0) {
+ swap(rootIndex, rootIndex + heapSize - 1);
+ heapSize--;
+ downHeap(rootIndex, 0, heapSize);
+ }
+ }
+
+ /**
+ * Correct the heap invariant of a parent and its children. This
+ * runs in {@code O(log n)} time.
+ * @param rootIndex index of the start of the bucket
+ * @param parent Index within the bucket of the parent to check.
+ * For example, 0 is the "root".
+ * @param heapSize Number of values that belong to the heap.
+ * Can be less than bucketSize.
+ * In such a case, the remaining values in range
+ * (rootIndex + heapSize, rootIndex + bucketSize)
+ * are *not* considered part of the heap.
+ */
+ private void downHeap(long rootIndex, int parent, int heapSize) {
+ while (true) {
+ long parentIndex = rootIndex + parent;
+ int worst = parent;
+ long worstIndex = parentIndex;
+ int leftChild = parent * 2 + 1;
+ long leftIndex = rootIndex + leftChild;
+ if (leftChild < heapSize) {
+ if (betterThan(values.get(worstIndex), values.get(leftIndex), extraValues.get(worstIndex), extraValues.get(leftIndex))) {
+ worst = leftChild;
+ worstIndex = leftIndex;
+ }
+ int rightChild = leftChild + 1;
+ long rightIndex = rootIndex + rightChild;
+ if (rightChild < heapSize
+ && betterThan(
+ values.get(worstIndex),
+ values.get(rightIndex),
+ extraValues.get(worstIndex),
+ extraValues.get(rightIndex)
+ )) {
+ worst = rightChild;
+ worstIndex = rightIndex;
+ }
+ }
+ if (worst == parent) {
+ break;
+ }
+ swap(worstIndex, parentIndex);
+ parent = worst;
+ }
+ }
+
+ @Override
+ public final void close() {
+ Releasables.close(values, extraValues, heapMode);
+ }
+}
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/IntBucketedSort.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/IntBucketedSort.java
index 5ba1a3f7138a3..7dcec4461f0a5 100644
--- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/IntBucketedSort.java
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/IntBucketedSort.java
@@ -23,7 +23,7 @@
import java.util.stream.IntStream;
/**
- * Aggregates the top N int values per bucket.
+ * Aggregates the top N {@code int} values per bucket.
* See {@link BucketedSort} for more information.
* This class is generated. Edit @{code X-BucketedSort.java.st} instead of this file.
*/
@@ -162,12 +162,7 @@ public void merge(int groupId, IntBucketedSort other, int otherGroupId) {
*/
public Block toBlock(BlockFactory blockFactory, IntVector selected) {
// Check if the selected groups are all empty, to avoid allocating extra memory
- if (IntStream.range(0, selected.getPositionCount()).map(selected::getInt).noneMatch(bucket -> {
- var bounds = this.getBucketValuesIndexes(bucket);
- var size = bounds.v2() - bounds.v1();
-
- return size > 0;
- })) {
+ if (allSelectedGroupsAreEmpty(selected)) {
return blockFactory.newConstantNullBlock(selected.getPositionCount());
}
@@ -185,7 +180,7 @@ public Block toBlock(BlockFactory blockFactory, IntVector selected) {
}
if (size == 1) {
- builder.appendInt(values.get(bounds.v1()));
+ builder.appendInt(values.get(rootIndex));
continue;
}
@@ -197,7 +192,7 @@ public Block toBlock(BlockFactory blockFactory, IntVector selected) {
builder.beginPositionEntry();
for (int i = 0; i < size; i++) {
- builder.appendInt(values.get(bounds.v1() + i));
+ builder.appendInt(values.get(rootIndex + i));
}
builder.endPositionEntry();
}
@@ -205,6 +200,17 @@ public Block toBlock(BlockFactory blockFactory, IntVector selected) {
}
}
+ /**
+ * Checks if the selected groups are all empty.
+ */
+ private boolean allSelectedGroupsAreEmpty(IntVector selected) {
+ return IntStream.range(0, selected.getPositionCount()).map(selected::getInt).noneMatch(bucket -> {
+ var bounds = this.getBucketValuesIndexes(bucket);
+ var size = bounds.v2() - bounds.v1();
+ return size > 0;
+ });
+ }
+
/**
* Is this bucket a min heap {@code true} or in gathering mode {@code false}?
*/
@@ -234,7 +240,8 @@ private void setNextGatherOffset(long rootIndex, int offset) {
* {@link SortOrder#ASC} and "higher" for {@link SortOrder#DESC}.
*/
private boolean betterThan(int lhs, int rhs) {
- return getOrder().reverseMul() * Integer.compare(lhs, rhs) < 0;
+ int res = Integer.compare(lhs, rhs);
+ return getOrder().reverseMul() * res < 0;
}
/**
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/IntDoubleBucketedSort.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/IntDoubleBucketedSort.java
new file mode 100644
index 0000000000000..9692a3490add6
--- /dev/null
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/IntDoubleBucketedSort.java
@@ -0,0 +1,406 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.data.sort;
+
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.common.util.BitArray;
+import org.elasticsearch.common.util.DoubleArray;
+import org.elasticsearch.common.util.IntArray;
+import org.elasticsearch.common.util.PageCacheRecycler;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.core.Releasable;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.core.Tuple;
+import org.elasticsearch.search.sort.BucketedSort;
+import org.elasticsearch.search.sort.SortOrder;
+
+import java.util.stream.IntStream;
+
+/**
+ * Aggregates the top N {@code int} values per bucket.
+ * See {@link BucketedSort} for more information.
+ * This class is generated. Edit @{code X-BucketedSort.java.st} instead of this file.
+ */
+public class IntDoubleBucketedSort implements Releasable {
+
+ private final BigArrays bigArrays;
+ private final SortOrder order;
+ private final int bucketSize;
+ /**
+ * {@code true} if the bucket is in heap mode, {@code false} if
+ * it is still gathering.
+ */
+ private final BitArray heapMode;
+ /**
+ * An array containing all the values on all buckets. The structure is as follows:
+ *
+ * For each bucket, there are bucketSize elements, based on the bucket id (0, 1, 2...).
+ * Then, for each bucket, it can be in 2 states:
+ *
+ *
+ *
+ * Gather mode: All buckets start in gather mode, and remain here while they have less than bucketSize elements.
+ * In gather mode, the elements are stored in the array from the highest index to the lowest index.
+ * The lowest index contains the offset to the next slot to be filled.
+ *
+ * This allows us to insert elements in O(1) time.
+ *
+ *
+ * When the bucketSize-th element is collected, the bucket transitions to heap mode, by heapifying its contents.
+ *
+ *
+ *
+ * Heap mode: The bucket slots are organized as a min heap structure.
+ *
+ * The root of the heap is the minimum value in the bucket,
+ * which allows us to quickly discard new values that are not in the top N.
+ *
+ * It may or may not be inserted in the heap, depending on if it is better than the current root.
+ *
+ */
+ public void collect(int value, double extraValue, int bucket) {
+ long rootIndex = (long) bucket * bucketSize;
+ if (inHeapMode(bucket)) {
+ if (betterThan(value, values.get(rootIndex), extraValue, extraValues.get(rootIndex))) {
+ values.set(rootIndex, value);
+ extraValues.set(rootIndex, extraValue);
+ downHeap(rootIndex, 0, bucketSize);
+ }
+ return;
+ }
+ // Gathering mode
+ long requiredSize = rootIndex + bucketSize;
+ if (values.size() < requiredSize) {
+ grow(bucket);
+ }
+ int next = getNextGatherOffset(rootIndex);
+ assert 0 <= next && next < bucketSize
+ : "Expected next to be in the range of valid buckets [0 <= " + next + " < " + bucketSize + "]";
+ long index = next + rootIndex;
+ values.set(index, value);
+ extraValues.set(index, extraValue);
+ if (next == 0) {
+ heapMode.set(bucket);
+ heapify(rootIndex, bucketSize);
+ } else {
+ setNextGatherOffset(rootIndex, next - 1);
+ }
+ }
+
+ /**
+ * The order of the sort.
+ */
+ public SortOrder getOrder() {
+ return order;
+ }
+
+ /**
+ * The number of values to store per bucket.
+ */
+ public int getBucketSize() {
+ return bucketSize;
+ }
+
+ /**
+ * Get the first and last indexes (inclusive, exclusive) of the values for a bucket.
+ * Returns [0, 0] if the bucket has never been collected.
+ */
+ private Tuple getBucketValuesIndexes(int bucket) {
+ long rootIndex = (long) bucket * bucketSize;
+ if (rootIndex >= values.size()) {
+ // We've never seen this bucket.
+ return Tuple.tuple(0L, 0L);
+ }
+ long start = inHeapMode(bucket) ? rootIndex : (rootIndex + getNextGatherOffset(rootIndex) + 1);
+ long end = rootIndex + bucketSize;
+ return Tuple.tuple(start, end);
+ }
+
+ /**
+ * Merge the values from {@code other}'s {@code otherGroupId} into {@code groupId}.
+ */
+ public void merge(int groupId, IntDoubleBucketedSort other, int otherGroupId) {
+ var otherBounds = other.getBucketValuesIndexes(otherGroupId);
+
+ // TODO: This can be improved for heapified buckets by making use of the heap structures
+ for (long i = otherBounds.v1(); i < otherBounds.v2(); i++) {
+ collect(other.values.get(i), other.extraValues.get(i), groupId);
+ }
+ }
+
+ /**
+ * Creates a block with the values from the {@code selected} groups.
+ */
+ public void toBlocks(BlockFactory blockFactory, Block[] blocks, int offset, IntVector selected) {
+ // Check if the selected groups are all empty, to avoid allocating extra memory
+ if (allSelectedGroupsAreEmpty(selected)) {
+ Block constantNullBlock = blockFactory.newConstantNullBlock(selected.getPositionCount());
+ constantNullBlock.incRef();
+ blocks[offset] = constantNullBlock;
+ blocks[offset + 1] = constantNullBlock;
+ return;
+ }
+
+ try (
+ var builder = blockFactory.newIntBlockBuilder(selected.getPositionCount());
+ var extraBuilder = blockFactory.newDoubleBlockBuilder(selected.getPositionCount())
+ ) {
+ for (int s = 0; s < selected.getPositionCount(); s++) {
+ int bucket = selected.getInt(s);
+
+ var bounds = getBucketValuesIndexes(bucket);
+ var rootIndex = bounds.v1();
+ var size = bounds.v2() - bounds.v1();
+
+ if (size == 0) {
+ builder.appendNull();
+ extraBuilder.appendNull();
+ continue;
+ }
+
+ if (size == 1) {
+ builder.appendInt(values.get(rootIndex));
+ extraBuilder.appendDouble(extraValues.get(rootIndex));
+ continue;
+ }
+
+ // If we are in the gathering mode, we need to heapify before sorting.
+ if (inHeapMode(bucket) == false) {
+ heapify(rootIndex, (int) size);
+ }
+ heapSort(rootIndex, (int) size);
+
+ builder.beginPositionEntry();
+ extraBuilder.beginPositionEntry();
+ for (int i = 0; i < size; i++) {
+ builder.appendInt(values.get(rootIndex + i));
+ extraBuilder.appendDouble(extraValues.get(rootIndex + i));
+ }
+ builder.endPositionEntry();
+ extraBuilder.endPositionEntry();
+ }
+ blocks[offset] = builder.build();
+ blocks[offset + 1] = extraBuilder.build();
+ }
+ }
+
+ /**
+ * Checks if the selected groups are all empty.
+ */
+ private boolean allSelectedGroupsAreEmpty(IntVector selected) {
+ return IntStream.range(0, selected.getPositionCount()).map(selected::getInt).noneMatch(bucket -> {
+ var bounds = this.getBucketValuesIndexes(bucket);
+ var size = bounds.v2() - bounds.v1();
+ return size > 0;
+ });
+ }
+
+ /**
+ * Is this bucket a min heap {@code true} or in gathering mode {@code false}?
+ */
+ private boolean inHeapMode(int bucket) {
+ return heapMode.get(bucket);
+ }
+
+ /**
+ * Get the next index that should be "gathered" for a bucket rooted
+ * at {@code rootIndex}.
+ */
+ private int getNextGatherOffset(long rootIndex) {
+ return values.get(rootIndex);
+ }
+
+ /**
+ * Set the next index that should be "gathered" for a bucket rooted
+ * at {@code rootIndex}.
+ */
+ private void setNextGatherOffset(long rootIndex, int offset) {
+ values.set(rootIndex, offset);
+ }
+
+ /**
+ * {@code true} if the entry at index {@code lhs} is "better" than
+ * the entry at {@code rhs}. "Better" in this means "lower" for
+ * {@link SortOrder#ASC} and "higher" for {@link SortOrder#DESC}.
+ */
+ private boolean betterThan(int lhs, int rhs, double lhsExtra, double rhsExtra) {
+ int res = Integer.compare(lhs, rhs);
+ if (res != 0) {
+ return getOrder().reverseMul() * res < 0;
+ }
+ res = Double.compare(lhsExtra, rhsExtra);
+ return getOrder().reverseMul() * res < 0;
+ }
+
+ /**
+ * Swap the data at two indices.
+ */
+ private void swap(long lhs, long rhs) {
+ var tmp = values.get(lhs);
+ values.set(lhs, values.get(rhs));
+ values.set(rhs, tmp);
+ var tmpExtra = extraValues.get(lhs);
+ extraValues.set(lhs, extraValues.get(rhs));
+ extraValues.set(rhs, tmpExtra);
+ }
+
+ /**
+ * Allocate storage for more buckets and store the "next gather offset"
+ * for those new buckets. We always grow the storage by whole bucket's
+ * worth of slots at a time. We never allocate space for partial buckets.
+ */
+ private void grow(int bucket) {
+ long oldMax = values.size();
+ assert oldMax % bucketSize == 0;
+
+ long newSize = BigArrays.overSize(((long) bucket + 1) * bucketSize, PageCacheRecycler.INT_PAGE_SIZE, Integer.BYTES);
+ // Round up to the next full bucket.
+ newSize = (newSize + bucketSize - 1) / bucketSize;
+ values = bigArrays.resize(values, newSize * bucketSize);
+ // Round up to the next full bucket.
+ extraValues = bigArrays.resize(extraValues, newSize * bucketSize);
+ // Set the next gather offsets for all newly allocated buckets.
+ fillGatherOffsets(oldMax);
+ }
+
+ /**
+ * Maintain the "next gather offsets" for newly allocated buckets.
+ */
+ private void fillGatherOffsets(long startingAt) {
+ int nextOffset = getBucketSize() - 1;
+ for (long bucketRoot = startingAt; bucketRoot < values.size(); bucketRoot += getBucketSize()) {
+ setNextGatherOffset(bucketRoot, nextOffset);
+ }
+ }
+
+ /**
+ * Heapify a bucket whose entries are in random order.
+ *
+ * This works by validating the heap property on each node, iterating
+ * "upwards", pushing any out of order parents "down". Check out the
+ * wikipedia
+ * entry on binary heaps for more about this.
+ *
+ *
+ * While this *looks* like it could easily be {@code O(n * log n)}, it is
+ * a fairly well studied algorithm attributed to Floyd. There's
+ * been a bunch of work that puts this at {@code O(n)}, close to 1.88n worst
+ * case.
+ *
D.E. Knuth, ”The Art of Computer Programming, Vol. 3, Sorting and Searching”
+ *
+ * @param rootIndex the index the start of the bucket
+ */
+ private void heapify(long rootIndex, int heapSize) {
+ int maxParent = heapSize / 2 - 1;
+ for (int parent = maxParent; parent >= 0; parent--) {
+ downHeap(rootIndex, parent, heapSize);
+ }
+ }
+
+ /**
+ * Sorts all the values in the heap using heap sort algorithm.
+ * This runs in {@code O(n log n)} time.
+ * @param rootIndex index of the start of the bucket
+ * @param heapSize Number of values that belong to the heap.
+ * Can be less than bucketSize.
+ * In such a case, the remaining values in range
+ * (rootIndex + heapSize, rootIndex + bucketSize)
+ * are *not* considered part of the heap.
+ */
+ private void heapSort(long rootIndex, int heapSize) {
+ while (heapSize > 0) {
+ swap(rootIndex, rootIndex + heapSize - 1);
+ heapSize--;
+ downHeap(rootIndex, 0, heapSize);
+ }
+ }
+
+ /**
+ * Correct the heap invariant of a parent and its children. This
+ * runs in {@code O(log n)} time.
+ * @param rootIndex index of the start of the bucket
+ * @param parent Index within the bucket of the parent to check.
+ * For example, 0 is the "root".
+ * @param heapSize Number of values that belong to the heap.
+ * Can be less than bucketSize.
+ * In such a case, the remaining values in range
+ * (rootIndex + heapSize, rootIndex + bucketSize)
+ * are *not* considered part of the heap.
+ */
+ private void downHeap(long rootIndex, int parent, int heapSize) {
+ while (true) {
+ long parentIndex = rootIndex + parent;
+ int worst = parent;
+ long worstIndex = parentIndex;
+ int leftChild = parent * 2 + 1;
+ long leftIndex = rootIndex + leftChild;
+ if (leftChild < heapSize) {
+ if (betterThan(values.get(worstIndex), values.get(leftIndex), extraValues.get(worstIndex), extraValues.get(leftIndex))) {
+ worst = leftChild;
+ worstIndex = leftIndex;
+ }
+ int rightChild = leftChild + 1;
+ long rightIndex = rootIndex + rightChild;
+ if (rightChild < heapSize
+ && betterThan(
+ values.get(worstIndex),
+ values.get(rightIndex),
+ extraValues.get(worstIndex),
+ extraValues.get(rightIndex)
+ )) {
+ worst = rightChild;
+ worstIndex = rightIndex;
+ }
+ }
+ if (worst == parent) {
+ break;
+ }
+ swap(worstIndex, parentIndex);
+ parent = worst;
+ }
+ }
+
+ @Override
+ public final void close() {
+ Releasables.close(values, extraValues, heapMode);
+ }
+}
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/IntFloatBucketedSort.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/IntFloatBucketedSort.java
new file mode 100644
index 0000000000000..756f9894d887a
--- /dev/null
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/IntFloatBucketedSort.java
@@ -0,0 +1,406 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.data.sort;
+
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.common.util.BitArray;
+import org.elasticsearch.common.util.FloatArray;
+import org.elasticsearch.common.util.IntArray;
+import org.elasticsearch.common.util.PageCacheRecycler;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.core.Releasable;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.core.Tuple;
+import org.elasticsearch.search.sort.BucketedSort;
+import org.elasticsearch.search.sort.SortOrder;
+
+import java.util.stream.IntStream;
+
+/**
+ * Aggregates the top N {@code int} values per bucket.
+ * See {@link BucketedSort} for more information.
+ * This class is generated. Edit @{code X-BucketedSort.java.st} instead of this file.
+ */
+public class IntFloatBucketedSort implements Releasable {
+
+ private final BigArrays bigArrays;
+ private final SortOrder order;
+ private final int bucketSize;
+ /**
+ * {@code true} if the bucket is in heap mode, {@code false} if
+ * it is still gathering.
+ */
+ private final BitArray heapMode;
+ /**
+ * An array containing all the values on all buckets. The structure is as follows:
+ *
+ * For each bucket, there are bucketSize elements, based on the bucket id (0, 1, 2...).
+ * Then, for each bucket, it can be in 2 states:
+ *
+ *
+ *
+ * Gather mode: All buckets start in gather mode, and remain here while they have less than bucketSize elements.
+ * In gather mode, the elements are stored in the array from the highest index to the lowest index.
+ * The lowest index contains the offset to the next slot to be filled.
+ *
+ * This allows us to insert elements in O(1) time.
+ *
+ *
+ * When the bucketSize-th element is collected, the bucket transitions to heap mode, by heapifying its contents.
+ *
+ *
+ *
+ * Heap mode: The bucket slots are organized as a min heap structure.
+ *
+ * The root of the heap is the minimum value in the bucket,
+ * which allows us to quickly discard new values that are not in the top N.
+ *
+ * It may or may not be inserted in the heap, depending on if it is better than the current root.
+ *
+ */
+ public void collect(int value, float extraValue, int bucket) {
+ long rootIndex = (long) bucket * bucketSize;
+ if (inHeapMode(bucket)) {
+ if (betterThan(value, values.get(rootIndex), extraValue, extraValues.get(rootIndex))) {
+ values.set(rootIndex, value);
+ extraValues.set(rootIndex, extraValue);
+ downHeap(rootIndex, 0, bucketSize);
+ }
+ return;
+ }
+ // Gathering mode
+ long requiredSize = rootIndex + bucketSize;
+ if (values.size() < requiredSize) {
+ grow(bucket);
+ }
+ int next = getNextGatherOffset(rootIndex);
+ assert 0 <= next && next < bucketSize
+ : "Expected next to be in the range of valid buckets [0 <= " + next + " < " + bucketSize + "]";
+ long index = next + rootIndex;
+ values.set(index, value);
+ extraValues.set(index, extraValue);
+ if (next == 0) {
+ heapMode.set(bucket);
+ heapify(rootIndex, bucketSize);
+ } else {
+ setNextGatherOffset(rootIndex, next - 1);
+ }
+ }
+
+ /**
+ * The order of the sort.
+ */
+ public SortOrder getOrder() {
+ return order;
+ }
+
+ /**
+ * The number of values to store per bucket.
+ */
+ public int getBucketSize() {
+ return bucketSize;
+ }
+
+ /**
+ * Get the first and last indexes (inclusive, exclusive) of the values for a bucket.
+ * Returns [0, 0] if the bucket has never been collected.
+ */
+ private Tuple getBucketValuesIndexes(int bucket) {
+ long rootIndex = (long) bucket * bucketSize;
+ if (rootIndex >= values.size()) {
+ // We've never seen this bucket.
+ return Tuple.tuple(0L, 0L);
+ }
+ long start = inHeapMode(bucket) ? rootIndex : (rootIndex + getNextGatherOffset(rootIndex) + 1);
+ long end = rootIndex + bucketSize;
+ return Tuple.tuple(start, end);
+ }
+
+ /**
+ * Merge the values from {@code other}'s {@code otherGroupId} into {@code groupId}.
+ */
+ public void merge(int groupId, IntFloatBucketedSort other, int otherGroupId) {
+ var otherBounds = other.getBucketValuesIndexes(otherGroupId);
+
+ // TODO: This can be improved for heapified buckets by making use of the heap structures
+ for (long i = otherBounds.v1(); i < otherBounds.v2(); i++) {
+ collect(other.values.get(i), other.extraValues.get(i), groupId);
+ }
+ }
+
+ /**
+ * Creates a block with the values from the {@code selected} groups.
+ */
+ public void toBlocks(BlockFactory blockFactory, Block[] blocks, int offset, IntVector selected) {
+ // Check if the selected groups are all empty, to avoid allocating extra memory
+ if (allSelectedGroupsAreEmpty(selected)) {
+ Block constantNullBlock = blockFactory.newConstantNullBlock(selected.getPositionCount());
+ constantNullBlock.incRef();
+ blocks[offset] = constantNullBlock;
+ blocks[offset + 1] = constantNullBlock;
+ return;
+ }
+
+ try (
+ var builder = blockFactory.newIntBlockBuilder(selected.getPositionCount());
+ var extraBuilder = blockFactory.newFloatBlockBuilder(selected.getPositionCount())
+ ) {
+ for (int s = 0; s < selected.getPositionCount(); s++) {
+ int bucket = selected.getInt(s);
+
+ var bounds = getBucketValuesIndexes(bucket);
+ var rootIndex = bounds.v1();
+ var size = bounds.v2() - bounds.v1();
+
+ if (size == 0) {
+ builder.appendNull();
+ extraBuilder.appendNull();
+ continue;
+ }
+
+ if (size == 1) {
+ builder.appendInt(values.get(rootIndex));
+ extraBuilder.appendFloat(extraValues.get(rootIndex));
+ continue;
+ }
+
+ // If we are in the gathering mode, we need to heapify before sorting.
+ if (inHeapMode(bucket) == false) {
+ heapify(rootIndex, (int) size);
+ }
+ heapSort(rootIndex, (int) size);
+
+ builder.beginPositionEntry();
+ extraBuilder.beginPositionEntry();
+ for (int i = 0; i < size; i++) {
+ builder.appendInt(values.get(rootIndex + i));
+ extraBuilder.appendFloat(extraValues.get(rootIndex + i));
+ }
+ builder.endPositionEntry();
+ extraBuilder.endPositionEntry();
+ }
+ blocks[offset] = builder.build();
+ blocks[offset + 1] = extraBuilder.build();
+ }
+ }
+
+ /**
+ * Checks if the selected groups are all empty.
+ */
+ private boolean allSelectedGroupsAreEmpty(IntVector selected) {
+ return IntStream.range(0, selected.getPositionCount()).map(selected::getInt).noneMatch(bucket -> {
+ var bounds = this.getBucketValuesIndexes(bucket);
+ var size = bounds.v2() - bounds.v1();
+ return size > 0;
+ });
+ }
+
+ /**
+ * Is this bucket a min heap {@code true} or in gathering mode {@code false}?
+ */
+ private boolean inHeapMode(int bucket) {
+ return heapMode.get(bucket);
+ }
+
+ /**
+ * Get the next index that should be "gathered" for a bucket rooted
+ * at {@code rootIndex}.
+ */
+ private int getNextGatherOffset(long rootIndex) {
+ return values.get(rootIndex);
+ }
+
+ /**
+ * Set the next index that should be "gathered" for a bucket rooted
+ * at {@code rootIndex}.
+ */
+ private void setNextGatherOffset(long rootIndex, int offset) {
+ values.set(rootIndex, offset);
+ }
+
+ /**
+ * {@code true} if the entry at index {@code lhs} is "better" than
+ * the entry at {@code rhs}. "Better" in this means "lower" for
+ * {@link SortOrder#ASC} and "higher" for {@link SortOrder#DESC}.
+ */
+ private boolean betterThan(int lhs, int rhs, float lhsExtra, float rhsExtra) {
+ int res = Integer.compare(lhs, rhs);
+ if (res != 0) {
+ return getOrder().reverseMul() * res < 0;
+ }
+ res = Float.compare(lhsExtra, rhsExtra);
+ return getOrder().reverseMul() * res < 0;
+ }
+
+ /**
+ * Swap the data at two indices.
+ */
+ private void swap(long lhs, long rhs) {
+ var tmp = values.get(lhs);
+ values.set(lhs, values.get(rhs));
+ values.set(rhs, tmp);
+ var tmpExtra = extraValues.get(lhs);
+ extraValues.set(lhs, extraValues.get(rhs));
+ extraValues.set(rhs, tmpExtra);
+ }
+
+ /**
+ * Allocate storage for more buckets and store the "next gather offset"
+ * for those new buckets. We always grow the storage by whole bucket's
+ * worth of slots at a time. We never allocate space for partial buckets.
+ */
+ private void grow(int bucket) {
+ long oldMax = values.size();
+ assert oldMax % bucketSize == 0;
+
+ long newSize = BigArrays.overSize(((long) bucket + 1) * bucketSize, PageCacheRecycler.INT_PAGE_SIZE, Integer.BYTES);
+ // Round up to the next full bucket.
+ newSize = (newSize + bucketSize - 1) / bucketSize;
+ values = bigArrays.resize(values, newSize * bucketSize);
+ // Round up to the next full bucket.
+ extraValues = bigArrays.resize(extraValues, newSize * bucketSize);
+ // Set the next gather offsets for all newly allocated buckets.
+ fillGatherOffsets(oldMax);
+ }
+
+ /**
+ * Maintain the "next gather offsets" for newly allocated buckets.
+ */
+ private void fillGatherOffsets(long startingAt) {
+ int nextOffset = getBucketSize() - 1;
+ for (long bucketRoot = startingAt; bucketRoot < values.size(); bucketRoot += getBucketSize()) {
+ setNextGatherOffset(bucketRoot, nextOffset);
+ }
+ }
+
+ /**
+ * Heapify a bucket whose entries are in random order.
+ *
+ * This works by validating the heap property on each node, iterating
+ * "upwards", pushing any out of order parents "down". Check out the
+ * wikipedia
+ * entry on binary heaps for more about this.
+ *
+ *
+ * While this *looks* like it could easily be {@code O(n * log n)}, it is
+ * a fairly well studied algorithm attributed to Floyd. There's
+ * been a bunch of work that puts this at {@code O(n)}, close to 1.88n worst
+ * case.
+ *
D.E. Knuth, ”The Art of Computer Programming, Vol. 3, Sorting and Searching”
+ *
+ * @param rootIndex the index the start of the bucket
+ */
+ private void heapify(long rootIndex, int heapSize) {
+ int maxParent = heapSize / 2 - 1;
+ for (int parent = maxParent; parent >= 0; parent--) {
+ downHeap(rootIndex, parent, heapSize);
+ }
+ }
+
+ /**
+ * Sorts all the values in the heap using heap sort algorithm.
+ * This runs in {@code O(n log n)} time.
+ * @param rootIndex index of the start of the bucket
+ * @param heapSize Number of values that belong to the heap.
+ * Can be less than bucketSize.
+ * In such a case, the remaining values in range
+ * (rootIndex + heapSize, rootIndex + bucketSize)
+ * are *not* considered part of the heap.
+ */
+ private void heapSort(long rootIndex, int heapSize) {
+ while (heapSize > 0) {
+ swap(rootIndex, rootIndex + heapSize - 1);
+ heapSize--;
+ downHeap(rootIndex, 0, heapSize);
+ }
+ }
+
+ /**
+ * Correct the heap invariant of a parent and its children. This
+ * runs in {@code O(log n)} time.
+ * @param rootIndex index of the start of the bucket
+ * @param parent Index within the bucket of the parent to check.
+ * For example, 0 is the "root".
+ * @param heapSize Number of values that belong to the heap.
+ * Can be less than bucketSize.
+ * In such a case, the remaining values in range
+ * (rootIndex + heapSize, rootIndex + bucketSize)
+ * are *not* considered part of the heap.
+ */
+ private void downHeap(long rootIndex, int parent, int heapSize) {
+ while (true) {
+ long parentIndex = rootIndex + parent;
+ int worst = parent;
+ long worstIndex = parentIndex;
+ int leftChild = parent * 2 + 1;
+ long leftIndex = rootIndex + leftChild;
+ if (leftChild < heapSize) {
+ if (betterThan(values.get(worstIndex), values.get(leftIndex), extraValues.get(worstIndex), extraValues.get(leftIndex))) {
+ worst = leftChild;
+ worstIndex = leftIndex;
+ }
+ int rightChild = leftChild + 1;
+ long rightIndex = rootIndex + rightChild;
+ if (rightChild < heapSize
+ && betterThan(
+ values.get(worstIndex),
+ values.get(rightIndex),
+ extraValues.get(worstIndex),
+ extraValues.get(rightIndex)
+ )) {
+ worst = rightChild;
+ worstIndex = rightIndex;
+ }
+ }
+ if (worst == parent) {
+ break;
+ }
+ swap(worstIndex, parentIndex);
+ parent = worst;
+ }
+ }
+
+ @Override
+ public final void close() {
+ Releasables.close(values, extraValues, heapMode);
+ }
+}
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/IntIntBucketedSort.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/IntIntBucketedSort.java
new file mode 100644
index 0000000000000..13cb37e886ce6
--- /dev/null
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/IntIntBucketedSort.java
@@ -0,0 +1,406 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.data.sort;
+
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.common.util.BitArray;
+import org.elasticsearch.common.util.IntArray;
+import org.elasticsearch.common.util.IntArray;
+import org.elasticsearch.common.util.PageCacheRecycler;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.core.Releasable;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.core.Tuple;
+import org.elasticsearch.search.sort.BucketedSort;
+import org.elasticsearch.search.sort.SortOrder;
+
+import java.util.stream.IntStream;
+
+/**
+ * Aggregates the top N {@code int} values per bucket.
+ * See {@link BucketedSort} for more information.
+ * This class is generated. Edit @{code X-BucketedSort.java.st} instead of this file.
+ */
+public class IntIntBucketedSort implements Releasable {
+
+ private final BigArrays bigArrays;
+ private final SortOrder order;
+ private final int bucketSize;
+ /**
+ * {@code true} if the bucket is in heap mode, {@code false} if
+ * it is still gathering.
+ */
+ private final BitArray heapMode;
+ /**
+ * An array containing all the values on all buckets. The structure is as follows:
+ *
+ * For each bucket, there are bucketSize elements, based on the bucket id (0, 1, 2...).
+ * Then, for each bucket, it can be in 2 states:
+ *
+ *
+ *
+ * Gather mode: All buckets start in gather mode, and remain here while they have less than bucketSize elements.
+ * In gather mode, the elements are stored in the array from the highest index to the lowest index.
+ * The lowest index contains the offset to the next slot to be filled.
+ *
+ * This allows us to insert elements in O(1) time.
+ *
+ *
+ * When the bucketSize-th element is collected, the bucket transitions to heap mode, by heapifying its contents.
+ *
+ *
+ *
+ * Heap mode: The bucket slots are organized as a min heap structure.
+ *
+ * The root of the heap is the minimum value in the bucket,
+ * which allows us to quickly discard new values that are not in the top N.
+ *
+ * It may or may not be inserted in the heap, depending on if it is better than the current root.
+ *
+ */
+ public void collect(int value, int extraValue, int bucket) {
+ long rootIndex = (long) bucket * bucketSize;
+ if (inHeapMode(bucket)) {
+ if (betterThan(value, values.get(rootIndex), extraValue, extraValues.get(rootIndex))) {
+ values.set(rootIndex, value);
+ extraValues.set(rootIndex, extraValue);
+ downHeap(rootIndex, 0, bucketSize);
+ }
+ return;
+ }
+ // Gathering mode
+ long requiredSize = rootIndex + bucketSize;
+ if (values.size() < requiredSize) {
+ grow(bucket);
+ }
+ int next = getNextGatherOffset(rootIndex);
+ assert 0 <= next && next < bucketSize
+ : "Expected next to be in the range of valid buckets [0 <= " + next + " < " + bucketSize + "]";
+ long index = next + rootIndex;
+ values.set(index, value);
+ extraValues.set(index, extraValue);
+ if (next == 0) {
+ heapMode.set(bucket);
+ heapify(rootIndex, bucketSize);
+ } else {
+ setNextGatherOffset(rootIndex, next - 1);
+ }
+ }
+
+ /**
+ * The order of the sort.
+ */
+ public SortOrder getOrder() {
+ return order;
+ }
+
+ /**
+ * The number of values to store per bucket.
+ */
+ public int getBucketSize() {
+ return bucketSize;
+ }
+
+ /**
+ * Get the first and last indexes (inclusive, exclusive) of the values for a bucket.
+ * Returns [0, 0] if the bucket has never been collected.
+ */
+ private Tuple getBucketValuesIndexes(int bucket) {
+ long rootIndex = (long) bucket * bucketSize;
+ if (rootIndex >= values.size()) {
+ // We've never seen this bucket.
+ return Tuple.tuple(0L, 0L);
+ }
+ long start = inHeapMode(bucket) ? rootIndex : (rootIndex + getNextGatherOffset(rootIndex) + 1);
+ long end = rootIndex + bucketSize;
+ return Tuple.tuple(start, end);
+ }
+
+ /**
+ * Merge the values from {@code other}'s {@code otherGroupId} into {@code groupId}.
+ */
+ public void merge(int groupId, IntIntBucketedSort other, int otherGroupId) {
+ var otherBounds = other.getBucketValuesIndexes(otherGroupId);
+
+ // TODO: This can be improved for heapified buckets by making use of the heap structures
+ for (long i = otherBounds.v1(); i < otherBounds.v2(); i++) {
+ collect(other.values.get(i), other.extraValues.get(i), groupId);
+ }
+ }
+
+ /**
+ * Creates a block with the values from the {@code selected} groups.
+ */
+ public void toBlocks(BlockFactory blockFactory, Block[] blocks, int offset, IntVector selected) {
+ // Check if the selected groups are all empty, to avoid allocating extra memory
+ if (allSelectedGroupsAreEmpty(selected)) {
+ Block constantNullBlock = blockFactory.newConstantNullBlock(selected.getPositionCount());
+ constantNullBlock.incRef();
+ blocks[offset] = constantNullBlock;
+ blocks[offset + 1] = constantNullBlock;
+ return;
+ }
+
+ try (
+ var builder = blockFactory.newIntBlockBuilder(selected.getPositionCount());
+ var extraBuilder = blockFactory.newIntBlockBuilder(selected.getPositionCount())
+ ) {
+ for (int s = 0; s < selected.getPositionCount(); s++) {
+ int bucket = selected.getInt(s);
+
+ var bounds = getBucketValuesIndexes(bucket);
+ var rootIndex = bounds.v1();
+ var size = bounds.v2() - bounds.v1();
+
+ if (size == 0) {
+ builder.appendNull();
+ extraBuilder.appendNull();
+ continue;
+ }
+
+ if (size == 1) {
+ builder.appendInt(values.get(rootIndex));
+ extraBuilder.appendInt(extraValues.get(rootIndex));
+ continue;
+ }
+
+ // If we are in the gathering mode, we need to heapify before sorting.
+ if (inHeapMode(bucket) == false) {
+ heapify(rootIndex, (int) size);
+ }
+ heapSort(rootIndex, (int) size);
+
+ builder.beginPositionEntry();
+ extraBuilder.beginPositionEntry();
+ for (int i = 0; i < size; i++) {
+ builder.appendInt(values.get(rootIndex + i));
+ extraBuilder.appendInt(extraValues.get(rootIndex + i));
+ }
+ builder.endPositionEntry();
+ extraBuilder.endPositionEntry();
+ }
+ blocks[offset] = builder.build();
+ blocks[offset + 1] = extraBuilder.build();
+ }
+ }
+
+ /**
+ * Checks if the selected groups are all empty.
+ */
+ private boolean allSelectedGroupsAreEmpty(IntVector selected) {
+ return IntStream.range(0, selected.getPositionCount()).map(selected::getInt).noneMatch(bucket -> {
+ var bounds = this.getBucketValuesIndexes(bucket);
+ var size = bounds.v2() - bounds.v1();
+ return size > 0;
+ });
+ }
+
+ /**
+ * Is this bucket a min heap {@code true} or in gathering mode {@code false}?
+ */
+ private boolean inHeapMode(int bucket) {
+ return heapMode.get(bucket);
+ }
+
+ /**
+ * Get the next index that should be "gathered" for a bucket rooted
+ * at {@code rootIndex}.
+ */
+ private int getNextGatherOffset(long rootIndex) {
+ return values.get(rootIndex);
+ }
+
+ /**
+ * Set the next index that should be "gathered" for a bucket rooted
+ * at {@code rootIndex}.
+ */
+ private void setNextGatherOffset(long rootIndex, int offset) {
+ values.set(rootIndex, offset);
+ }
+
+ /**
+ * {@code true} if the entry at index {@code lhs} is "better" than
+ * the entry at {@code rhs}. "Better" in this means "lower" for
+ * {@link SortOrder#ASC} and "higher" for {@link SortOrder#DESC}.
+ */
+ private boolean betterThan(int lhs, int rhs, int lhsExtra, int rhsExtra) {
+ int res = Integer.compare(lhs, rhs);
+ if (res != 0) {
+ return getOrder().reverseMul() * res < 0;
+ }
+ res = Integer.compare(lhsExtra, rhsExtra);
+ return getOrder().reverseMul() * res < 0;
+ }
+
+ /**
+ * Swap the data at two indices.
+ */
+ private void swap(long lhs, long rhs) {
+ var tmp = values.get(lhs);
+ values.set(lhs, values.get(rhs));
+ values.set(rhs, tmp);
+ var tmpExtra = extraValues.get(lhs);
+ extraValues.set(lhs, extraValues.get(rhs));
+ extraValues.set(rhs, tmpExtra);
+ }
+
+ /**
+ * Allocate storage for more buckets and store the "next gather offset"
+ * for those new buckets. We always grow the storage by whole bucket's
+ * worth of slots at a time. We never allocate space for partial buckets.
+ */
+ private void grow(int bucket) {
+ long oldMax = values.size();
+ assert oldMax % bucketSize == 0;
+
+ long newSize = BigArrays.overSize(((long) bucket + 1) * bucketSize, PageCacheRecycler.INT_PAGE_SIZE, Integer.BYTES);
+ // Round up to the next full bucket.
+ newSize = (newSize + bucketSize - 1) / bucketSize;
+ values = bigArrays.resize(values, newSize * bucketSize);
+ // Round up to the next full bucket.
+ extraValues = bigArrays.resize(extraValues, newSize * bucketSize);
+ // Set the next gather offsets for all newly allocated buckets.
+ fillGatherOffsets(oldMax);
+ }
+
+ /**
+ * Maintain the "next gather offsets" for newly allocated buckets.
+ */
+ private void fillGatherOffsets(long startingAt) {
+ int nextOffset = getBucketSize() - 1;
+ for (long bucketRoot = startingAt; bucketRoot < values.size(); bucketRoot += getBucketSize()) {
+ setNextGatherOffset(bucketRoot, nextOffset);
+ }
+ }
+
+ /**
+ * Heapify a bucket whose entries are in random order.
+ *
+ * This works by validating the heap property on each node, iterating
+ * "upwards", pushing any out of order parents "down". Check out the
+ * wikipedia
+ * entry on binary heaps for more about this.
+ *
+ *
+ * While this *looks* like it could easily be {@code O(n * log n)}, it is
+ * a fairly well studied algorithm attributed to Floyd. There's
+ * been a bunch of work that puts this at {@code O(n)}, close to 1.88n worst
+ * case.
+ *
D.E. Knuth, ”The Art of Computer Programming, Vol. 3, Sorting and Searching”
+ *
+ * @param rootIndex the index the start of the bucket
+ */
+ private void heapify(long rootIndex, int heapSize) {
+ int maxParent = heapSize / 2 - 1;
+ for (int parent = maxParent; parent >= 0; parent--) {
+ downHeap(rootIndex, parent, heapSize);
+ }
+ }
+
+ /**
+ * Sorts all the values in the heap using heap sort algorithm.
+ * This runs in {@code O(n log n)} time.
+ * @param rootIndex index of the start of the bucket
+ * @param heapSize Number of values that belong to the heap.
+ * Can be less than bucketSize.
+ * In such a case, the remaining values in range
+ * (rootIndex + heapSize, rootIndex + bucketSize)
+ * are *not* considered part of the heap.
+ */
+ private void heapSort(long rootIndex, int heapSize) {
+ while (heapSize > 0) {
+ swap(rootIndex, rootIndex + heapSize - 1);
+ heapSize--;
+ downHeap(rootIndex, 0, heapSize);
+ }
+ }
+
+ /**
+ * Correct the heap invariant of a parent and its children. This
+ * runs in {@code O(log n)} time.
+ * @param rootIndex index of the start of the bucket
+ * @param parent Index within the bucket of the parent to check.
+ * For example, 0 is the "root".
+ * @param heapSize Number of values that belong to the heap.
+ * Can be less than bucketSize.
+ * In such a case, the remaining values in range
+ * (rootIndex + heapSize, rootIndex + bucketSize)
+ * are *not* considered part of the heap.
+ */
+ private void downHeap(long rootIndex, int parent, int heapSize) {
+ while (true) {
+ long parentIndex = rootIndex + parent;
+ int worst = parent;
+ long worstIndex = parentIndex;
+ int leftChild = parent * 2 + 1;
+ long leftIndex = rootIndex + leftChild;
+ if (leftChild < heapSize) {
+ if (betterThan(values.get(worstIndex), values.get(leftIndex), extraValues.get(worstIndex), extraValues.get(leftIndex))) {
+ worst = leftChild;
+ worstIndex = leftIndex;
+ }
+ int rightChild = leftChild + 1;
+ long rightIndex = rootIndex + rightChild;
+ if (rightChild < heapSize
+ && betterThan(
+ values.get(worstIndex),
+ values.get(rightIndex),
+ extraValues.get(worstIndex),
+ extraValues.get(rightIndex)
+ )) {
+ worst = rightChild;
+ worstIndex = rightIndex;
+ }
+ }
+ if (worst == parent) {
+ break;
+ }
+ swap(worstIndex, parentIndex);
+ parent = worst;
+ }
+ }
+
+ @Override
+ public final void close() {
+ Releasables.close(values, extraValues, heapMode);
+ }
+}
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/IntLongBucketedSort.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/IntLongBucketedSort.java
new file mode 100644
index 0000000000000..dae223f67dc8d
--- /dev/null
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/IntLongBucketedSort.java
@@ -0,0 +1,406 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.data.sort;
+
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.common.util.BitArray;
+import org.elasticsearch.common.util.LongArray;
+import org.elasticsearch.common.util.IntArray;
+import org.elasticsearch.common.util.PageCacheRecycler;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.core.Releasable;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.core.Tuple;
+import org.elasticsearch.search.sort.BucketedSort;
+import org.elasticsearch.search.sort.SortOrder;
+
+import java.util.stream.IntStream;
+
+/**
+ * Aggregates the top N {@code int} values per bucket.
+ * See {@link BucketedSort} for more information.
+ * This class is generated. Edit @{code X-BucketedSort.java.st} instead of this file.
+ */
+public class IntLongBucketedSort implements Releasable {
+
+ private final BigArrays bigArrays;
+ private final SortOrder order;
+ private final int bucketSize;
+ /**
+ * {@code true} if the bucket is in heap mode, {@code false} if
+ * it is still gathering.
+ */
+ private final BitArray heapMode;
+ /**
+ * An array containing all the values on all buckets. The structure is as follows:
+ *
+ * For each bucket, there are bucketSize elements, based on the bucket id (0, 1, 2...).
+ * Then, for each bucket, it can be in 2 states:
+ *
+ *
+ *
+ * Gather mode: All buckets start in gather mode, and remain here while they have less than bucketSize elements.
+ * In gather mode, the elements are stored in the array from the highest index to the lowest index.
+ * The lowest index contains the offset to the next slot to be filled.
+ *
+ * This allows us to insert elements in O(1) time.
+ *
+ *
+ * When the bucketSize-th element is collected, the bucket transitions to heap mode, by heapifying its contents.
+ *
+ *
+ *
+ * Heap mode: The bucket slots are organized as a min heap structure.
+ *
+ * The root of the heap is the minimum value in the bucket,
+ * which allows us to quickly discard new values that are not in the top N.
+ *
+ * It may or may not be inserted in the heap, depending on if it is better than the current root.
+ *
+ */
+ public void collect(int value, long extraValue, int bucket) {
+ long rootIndex = (long) bucket * bucketSize;
+ if (inHeapMode(bucket)) {
+ if (betterThan(value, values.get(rootIndex), extraValue, extraValues.get(rootIndex))) {
+ values.set(rootIndex, value);
+ extraValues.set(rootIndex, extraValue);
+ downHeap(rootIndex, 0, bucketSize);
+ }
+ return;
+ }
+ // Gathering mode
+ long requiredSize = rootIndex + bucketSize;
+ if (values.size() < requiredSize) {
+ grow(bucket);
+ }
+ int next = getNextGatherOffset(rootIndex);
+ assert 0 <= next && next < bucketSize
+ : "Expected next to be in the range of valid buckets [0 <= " + next + " < " + bucketSize + "]";
+ long index = next + rootIndex;
+ values.set(index, value);
+ extraValues.set(index, extraValue);
+ if (next == 0) {
+ heapMode.set(bucket);
+ heapify(rootIndex, bucketSize);
+ } else {
+ setNextGatherOffset(rootIndex, next - 1);
+ }
+ }
+
+ /**
+ * The order of the sort.
+ */
+ public SortOrder getOrder() {
+ return order;
+ }
+
+ /**
+ * The number of values to store per bucket.
+ */
+ public int getBucketSize() {
+ return bucketSize;
+ }
+
+ /**
+ * Get the first and last indexes (inclusive, exclusive) of the values for a bucket.
+ * Returns [0, 0] if the bucket has never been collected.
+ */
+ private Tuple getBucketValuesIndexes(int bucket) {
+ long rootIndex = (long) bucket * bucketSize;
+ if (rootIndex >= values.size()) {
+ // We've never seen this bucket.
+ return Tuple.tuple(0L, 0L);
+ }
+ long start = inHeapMode(bucket) ? rootIndex : (rootIndex + getNextGatherOffset(rootIndex) + 1);
+ long end = rootIndex + bucketSize;
+ return Tuple.tuple(start, end);
+ }
+
+ /**
+ * Merge the values from {@code other}'s {@code otherGroupId} into {@code groupId}.
+ */
+ public void merge(int groupId, IntLongBucketedSort other, int otherGroupId) {
+ var otherBounds = other.getBucketValuesIndexes(otherGroupId);
+
+ // TODO: This can be improved for heapified buckets by making use of the heap structures
+ for (long i = otherBounds.v1(); i < otherBounds.v2(); i++) {
+ collect(other.values.get(i), other.extraValues.get(i), groupId);
+ }
+ }
+
+ /**
+ * Creates a block with the values from the {@code selected} groups.
+ */
+ public void toBlocks(BlockFactory blockFactory, Block[] blocks, int offset, IntVector selected) {
+ // Check if the selected groups are all empty, to avoid allocating extra memory
+ if (allSelectedGroupsAreEmpty(selected)) {
+ Block constantNullBlock = blockFactory.newConstantNullBlock(selected.getPositionCount());
+ constantNullBlock.incRef();
+ blocks[offset] = constantNullBlock;
+ blocks[offset + 1] = constantNullBlock;
+ return;
+ }
+
+ try (
+ var builder = blockFactory.newIntBlockBuilder(selected.getPositionCount());
+ var extraBuilder = blockFactory.newLongBlockBuilder(selected.getPositionCount())
+ ) {
+ for (int s = 0; s < selected.getPositionCount(); s++) {
+ int bucket = selected.getInt(s);
+
+ var bounds = getBucketValuesIndexes(bucket);
+ var rootIndex = bounds.v1();
+ var size = bounds.v2() - bounds.v1();
+
+ if (size == 0) {
+ builder.appendNull();
+ extraBuilder.appendNull();
+ continue;
+ }
+
+ if (size == 1) {
+ builder.appendInt(values.get(rootIndex));
+ extraBuilder.appendLong(extraValues.get(rootIndex));
+ continue;
+ }
+
+ // If we are in the gathering mode, we need to heapify before sorting.
+ if (inHeapMode(bucket) == false) {
+ heapify(rootIndex, (int) size);
+ }
+ heapSort(rootIndex, (int) size);
+
+ builder.beginPositionEntry();
+ extraBuilder.beginPositionEntry();
+ for (int i = 0; i < size; i++) {
+ builder.appendInt(values.get(rootIndex + i));
+ extraBuilder.appendLong(extraValues.get(rootIndex + i));
+ }
+ builder.endPositionEntry();
+ extraBuilder.endPositionEntry();
+ }
+ blocks[offset] = builder.build();
+ blocks[offset + 1] = extraBuilder.build();
+ }
+ }
+
+ /**
+ * Checks if the selected groups are all empty.
+ */
+ private boolean allSelectedGroupsAreEmpty(IntVector selected) {
+ return IntStream.range(0, selected.getPositionCount()).map(selected::getInt).noneMatch(bucket -> {
+ var bounds = this.getBucketValuesIndexes(bucket);
+ var size = bounds.v2() - bounds.v1();
+ return size > 0;
+ });
+ }
+
+ /**
+ * Is this bucket a min heap {@code true} or in gathering mode {@code false}?
+ */
+ private boolean inHeapMode(int bucket) {
+ return heapMode.get(bucket);
+ }
+
+ /**
+ * Get the next index that should be "gathered" for a bucket rooted
+ * at {@code rootIndex}.
+ */
+ private int getNextGatherOffset(long rootIndex) {
+ return values.get(rootIndex);
+ }
+
+ /**
+ * Set the next index that should be "gathered" for a bucket rooted
+ * at {@code rootIndex}.
+ */
+ private void setNextGatherOffset(long rootIndex, int offset) {
+ values.set(rootIndex, offset);
+ }
+
+ /**
+ * {@code true} if the entry at index {@code lhs} is "better" than
+ * the entry at {@code rhs}. "Better" in this means "lower" for
+ * {@link SortOrder#ASC} and "higher" for {@link SortOrder#DESC}.
+ */
+ private boolean betterThan(int lhs, int rhs, long lhsExtra, long rhsExtra) {
+ int res = Integer.compare(lhs, rhs);
+ if (res != 0) {
+ return getOrder().reverseMul() * res < 0;
+ }
+ res = Long.compare(lhsExtra, rhsExtra);
+ return getOrder().reverseMul() * res < 0;
+ }
+
+ /**
+ * Swap the data at two indices.
+ */
+ private void swap(long lhs, long rhs) {
+ var tmp = values.get(lhs);
+ values.set(lhs, values.get(rhs));
+ values.set(rhs, tmp);
+ var tmpExtra = extraValues.get(lhs);
+ extraValues.set(lhs, extraValues.get(rhs));
+ extraValues.set(rhs, tmpExtra);
+ }
+
+ /**
+ * Allocate storage for more buckets and store the "next gather offset"
+ * for those new buckets. We always grow the storage by whole bucket's
+ * worth of slots at a time. We never allocate space for partial buckets.
+ */
+ private void grow(int bucket) {
+ long oldMax = values.size();
+ assert oldMax % bucketSize == 0;
+
+ long newSize = BigArrays.overSize(((long) bucket + 1) * bucketSize, PageCacheRecycler.INT_PAGE_SIZE, Integer.BYTES);
+ // Round up to the next full bucket.
+ newSize = (newSize + bucketSize - 1) / bucketSize;
+ values = bigArrays.resize(values, newSize * bucketSize);
+ // Round up to the next full bucket.
+ extraValues = bigArrays.resize(extraValues, newSize * bucketSize);
+ // Set the next gather offsets for all newly allocated buckets.
+ fillGatherOffsets(oldMax);
+ }
+
+ /**
+ * Maintain the "next gather offsets" for newly allocated buckets.
+ */
+ private void fillGatherOffsets(long startingAt) {
+ int nextOffset = getBucketSize() - 1;
+ for (long bucketRoot = startingAt; bucketRoot < values.size(); bucketRoot += getBucketSize()) {
+ setNextGatherOffset(bucketRoot, nextOffset);
+ }
+ }
+
+ /**
+ * Heapify a bucket whose entries are in random order.
+ *
+ * This works by validating the heap property on each node, iterating
+ * "upwards", pushing any out of order parents "down". Check out the
+ * wikipedia
+ * entry on binary heaps for more about this.
+ *
+ *
+ * While this *looks* like it could easily be {@code O(n * log n)}, it is
+ * a fairly well studied algorithm attributed to Floyd. There's
+ * been a bunch of work that puts this at {@code O(n)}, close to 1.88n worst
+ * case.
+ *
D.E. Knuth, ”The Art of Computer Programming, Vol. 3, Sorting and Searching”
+ *
+ * @param rootIndex the index the start of the bucket
+ */
+ private void heapify(long rootIndex, int heapSize) {
+ int maxParent = heapSize / 2 - 1;
+ for (int parent = maxParent; parent >= 0; parent--) {
+ downHeap(rootIndex, parent, heapSize);
+ }
+ }
+
+ /**
+ * Sorts all the values in the heap using heap sort algorithm.
+ * This runs in {@code O(n log n)} time.
+ * @param rootIndex index of the start of the bucket
+ * @param heapSize Number of values that belong to the heap.
+ * Can be less than bucketSize.
+ * In such a case, the remaining values in range
+ * (rootIndex + heapSize, rootIndex + bucketSize)
+ * are *not* considered part of the heap.
+ */
+ private void heapSort(long rootIndex, int heapSize) {
+ while (heapSize > 0) {
+ swap(rootIndex, rootIndex + heapSize - 1);
+ heapSize--;
+ downHeap(rootIndex, 0, heapSize);
+ }
+ }
+
+ /**
+ * Correct the heap invariant of a parent and its children. This
+ * runs in {@code O(log n)} time.
+ * @param rootIndex index of the start of the bucket
+ * @param parent Index within the bucket of the parent to check.
+ * For example, 0 is the "root".
+ * @param heapSize Number of values that belong to the heap.
+ * Can be less than bucketSize.
+ * In such a case, the remaining values in range
+ * (rootIndex + heapSize, rootIndex + bucketSize)
+ * are *not* considered part of the heap.
+ */
+ private void downHeap(long rootIndex, int parent, int heapSize) {
+ while (true) {
+ long parentIndex = rootIndex + parent;
+ int worst = parent;
+ long worstIndex = parentIndex;
+ int leftChild = parent * 2 + 1;
+ long leftIndex = rootIndex + leftChild;
+ if (leftChild < heapSize) {
+ if (betterThan(values.get(worstIndex), values.get(leftIndex), extraValues.get(worstIndex), extraValues.get(leftIndex))) {
+ worst = leftChild;
+ worstIndex = leftIndex;
+ }
+ int rightChild = leftChild + 1;
+ long rightIndex = rootIndex + rightChild;
+ if (rightChild < heapSize
+ && betterThan(
+ values.get(worstIndex),
+ values.get(rightIndex),
+ extraValues.get(worstIndex),
+ extraValues.get(rightIndex)
+ )) {
+ worst = rightChild;
+ worstIndex = rightIndex;
+ }
+ }
+ if (worst == parent) {
+ break;
+ }
+ swap(worstIndex, parentIndex);
+ parent = worst;
+ }
+ }
+
+ @Override
+ public final void close() {
+ Releasables.close(values, extraValues, heapMode);
+ }
+}
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/LongBucketedSort.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/LongBucketedSort.java
index ac472cc411668..e10ce05017d23 100644
--- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/LongBucketedSort.java
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/LongBucketedSort.java
@@ -23,7 +23,7 @@
import java.util.stream.IntStream;
/**
- * Aggregates the top N long values per bucket.
+ * Aggregates the top N {@code long} values per bucket.
* See {@link BucketedSort} for more information.
* This class is generated. Edit @{code X-BucketedSort.java.st} instead of this file.
*/
@@ -162,12 +162,7 @@ public void merge(int groupId, LongBucketedSort other, int otherGroupId) {
*/
public Block toBlock(BlockFactory blockFactory, IntVector selected) {
// Check if the selected groups are all empty, to avoid allocating extra memory
- if (IntStream.range(0, selected.getPositionCount()).map(selected::getInt).noneMatch(bucket -> {
- var bounds = this.getBucketValuesIndexes(bucket);
- var size = bounds.v2() - bounds.v1();
-
- return size > 0;
- })) {
+ if (allSelectedGroupsAreEmpty(selected)) {
return blockFactory.newConstantNullBlock(selected.getPositionCount());
}
@@ -185,7 +180,7 @@ public Block toBlock(BlockFactory blockFactory, IntVector selected) {
}
if (size == 1) {
- builder.appendLong(values.get(bounds.v1()));
+ builder.appendLong(values.get(rootIndex));
continue;
}
@@ -197,7 +192,7 @@ public Block toBlock(BlockFactory blockFactory, IntVector selected) {
builder.beginPositionEntry();
for (int i = 0; i < size; i++) {
- builder.appendLong(values.get(bounds.v1() + i));
+ builder.appendLong(values.get(rootIndex + i));
}
builder.endPositionEntry();
}
@@ -205,6 +200,17 @@ public Block toBlock(BlockFactory blockFactory, IntVector selected) {
}
}
+ /**
+ * Checks if the selected groups are all empty.
+ */
+ private boolean allSelectedGroupsAreEmpty(IntVector selected) {
+ return IntStream.range(0, selected.getPositionCount()).map(selected::getInt).noneMatch(bucket -> {
+ var bounds = this.getBucketValuesIndexes(bucket);
+ var size = bounds.v2() - bounds.v1();
+ return size > 0;
+ });
+ }
+
/**
* Is this bucket a min heap {@code true} or in gathering mode {@code false}?
*/
@@ -234,7 +240,8 @@ private void setNextGatherOffset(long rootIndex, int offset) {
* {@link SortOrder#ASC} and "higher" for {@link SortOrder#DESC}.
*/
private boolean betterThan(long lhs, long rhs) {
- return getOrder().reverseMul() * Long.compare(lhs, rhs) < 0;
+ int res = Long.compare(lhs, rhs);
+ return getOrder().reverseMul() * res < 0;
}
/**
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/LongDoubleBucketedSort.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/LongDoubleBucketedSort.java
new file mode 100644
index 0000000000000..dd7bf77eaf6f6
--- /dev/null
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/LongDoubleBucketedSort.java
@@ -0,0 +1,406 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.data.sort;
+
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.common.util.BitArray;
+import org.elasticsearch.common.util.DoubleArray;
+import org.elasticsearch.common.util.LongArray;
+import org.elasticsearch.common.util.PageCacheRecycler;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.core.Releasable;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.core.Tuple;
+import org.elasticsearch.search.sort.BucketedSort;
+import org.elasticsearch.search.sort.SortOrder;
+
+import java.util.stream.IntStream;
+
+/**
+ * Aggregates the top N {@code long} values per bucket.
+ * See {@link BucketedSort} for more information.
+ * This class is generated. Edit @{code X-BucketedSort.java.st} instead of this file.
+ */
+public class LongDoubleBucketedSort implements Releasable {
+
+ private final BigArrays bigArrays;
+ private final SortOrder order;
+ private final int bucketSize;
+ /**
+ * {@code true} if the bucket is in heap mode, {@code false} if
+ * it is still gathering.
+ */
+ private final BitArray heapMode;
+ /**
+ * An array containing all the values on all buckets. The structure is as follows:
+ *
+ * For each bucket, there are bucketSize elements, based on the bucket id (0, 1, 2...).
+ * Then, for each bucket, it can be in 2 states:
+ *
+ *
+ *
+ * Gather mode: All buckets start in gather mode, and remain here while they have less than bucketSize elements.
+ * In gather mode, the elements are stored in the array from the highest index to the lowest index.
+ * The lowest index contains the offset to the next slot to be filled.
+ *
+ * This allows us to insert elements in O(1) time.
+ *
+ *
+ * When the bucketSize-th element is collected, the bucket transitions to heap mode, by heapifying its contents.
+ *
+ *
+ *
+ * Heap mode: The bucket slots are organized as a min heap structure.
+ *
+ * The root of the heap is the minimum value in the bucket,
+ * which allows us to quickly discard new values that are not in the top N.
+ *
+ * It may or may not be inserted in the heap, depending on if it is better than the current root.
+ *
+ */
+ public void collect(long value, double extraValue, int bucket) {
+ long rootIndex = (long) bucket * bucketSize;
+ if (inHeapMode(bucket)) {
+ if (betterThan(value, values.get(rootIndex), extraValue, extraValues.get(rootIndex))) {
+ values.set(rootIndex, value);
+ extraValues.set(rootIndex, extraValue);
+ downHeap(rootIndex, 0, bucketSize);
+ }
+ return;
+ }
+ // Gathering mode
+ long requiredSize = rootIndex + bucketSize;
+ if (values.size() < requiredSize) {
+ grow(bucket);
+ }
+ int next = getNextGatherOffset(rootIndex);
+ assert 0 <= next && next < bucketSize
+ : "Expected next to be in the range of valid buckets [0 <= " + next + " < " + bucketSize + "]";
+ long index = next + rootIndex;
+ values.set(index, value);
+ extraValues.set(index, extraValue);
+ if (next == 0) {
+ heapMode.set(bucket);
+ heapify(rootIndex, bucketSize);
+ } else {
+ setNextGatherOffset(rootIndex, next - 1);
+ }
+ }
+
+ /**
+ * The order of the sort.
+ */
+ public SortOrder getOrder() {
+ return order;
+ }
+
+ /**
+ * The number of values to store per bucket.
+ */
+ public int getBucketSize() {
+ return bucketSize;
+ }
+
+ /**
+ * Get the first and last indexes (inclusive, exclusive) of the values for a bucket.
+ * Returns [0, 0] if the bucket has never been collected.
+ */
+ private Tuple getBucketValuesIndexes(int bucket) {
+ long rootIndex = (long) bucket * bucketSize;
+ if (rootIndex >= values.size()) {
+ // We've never seen this bucket.
+ return Tuple.tuple(0L, 0L);
+ }
+ long start = inHeapMode(bucket) ? rootIndex : (rootIndex + getNextGatherOffset(rootIndex) + 1);
+ long end = rootIndex + bucketSize;
+ return Tuple.tuple(start, end);
+ }
+
+ /**
+ * Merge the values from {@code other}'s {@code otherGroupId} into {@code groupId}.
+ */
+ public void merge(int groupId, LongDoubleBucketedSort other, int otherGroupId) {
+ var otherBounds = other.getBucketValuesIndexes(otherGroupId);
+
+ // TODO: This can be improved for heapified buckets by making use of the heap structures
+ for (long i = otherBounds.v1(); i < otherBounds.v2(); i++) {
+ collect(other.values.get(i), other.extraValues.get(i), groupId);
+ }
+ }
+
+ /**
+ * Creates a block with the values from the {@code selected} groups.
+ */
+ public void toBlocks(BlockFactory blockFactory, Block[] blocks, int offset, IntVector selected) {
+ // Check if the selected groups are all empty, to avoid allocating extra memory
+ if (allSelectedGroupsAreEmpty(selected)) {
+ Block constantNullBlock = blockFactory.newConstantNullBlock(selected.getPositionCount());
+ constantNullBlock.incRef();
+ blocks[offset] = constantNullBlock;
+ blocks[offset + 1] = constantNullBlock;
+ return;
+ }
+
+ try (
+ var builder = blockFactory.newLongBlockBuilder(selected.getPositionCount());
+ var extraBuilder = blockFactory.newDoubleBlockBuilder(selected.getPositionCount())
+ ) {
+ for (int s = 0; s < selected.getPositionCount(); s++) {
+ int bucket = selected.getInt(s);
+
+ var bounds = getBucketValuesIndexes(bucket);
+ var rootIndex = bounds.v1();
+ var size = bounds.v2() - bounds.v1();
+
+ if (size == 0) {
+ builder.appendNull();
+ extraBuilder.appendNull();
+ continue;
+ }
+
+ if (size == 1) {
+ builder.appendLong(values.get(rootIndex));
+ extraBuilder.appendDouble(extraValues.get(rootIndex));
+ continue;
+ }
+
+ // If we are in the gathering mode, we need to heapify before sorting.
+ if (inHeapMode(bucket) == false) {
+ heapify(rootIndex, (int) size);
+ }
+ heapSort(rootIndex, (int) size);
+
+ builder.beginPositionEntry();
+ extraBuilder.beginPositionEntry();
+ for (int i = 0; i < size; i++) {
+ builder.appendLong(values.get(rootIndex + i));
+ extraBuilder.appendDouble(extraValues.get(rootIndex + i));
+ }
+ builder.endPositionEntry();
+ extraBuilder.endPositionEntry();
+ }
+ blocks[offset] = builder.build();
+ blocks[offset + 1] = extraBuilder.build();
+ }
+ }
+
+ /**
+ * Checks if the selected groups are all empty.
+ */
+ private boolean allSelectedGroupsAreEmpty(IntVector selected) {
+ return IntStream.range(0, selected.getPositionCount()).map(selected::getInt).noneMatch(bucket -> {
+ var bounds = this.getBucketValuesIndexes(bucket);
+ var size = bounds.v2() - bounds.v1();
+ return size > 0;
+ });
+ }
+
+ /**
+ * Is this bucket a min heap {@code true} or in gathering mode {@code false}?
+ */
+ private boolean inHeapMode(int bucket) {
+ return heapMode.get(bucket);
+ }
+
+ /**
+ * Get the next index that should be "gathered" for a bucket rooted
+ * at {@code rootIndex}.
+ */
+ private int getNextGatherOffset(long rootIndex) {
+ return (int) values.get(rootIndex);
+ }
+
+ /**
+ * Set the next index that should be "gathered" for a bucket rooted
+ * at {@code rootIndex}.
+ */
+ private void setNextGatherOffset(long rootIndex, int offset) {
+ values.set(rootIndex, offset);
+ }
+
+ /**
+ * {@code true} if the entry at index {@code lhs} is "better" than
+ * the entry at {@code rhs}. "Better" in this means "lower" for
+ * {@link SortOrder#ASC} and "higher" for {@link SortOrder#DESC}.
+ */
+ private boolean betterThan(long lhs, long rhs, double lhsExtra, double rhsExtra) {
+ int res = Long.compare(lhs, rhs);
+ if (res != 0) {
+ return getOrder().reverseMul() * res < 0;
+ }
+ res = Double.compare(lhsExtra, rhsExtra);
+ return getOrder().reverseMul() * res < 0;
+ }
+
+ /**
+ * Swap the data at two indices.
+ */
+ private void swap(long lhs, long rhs) {
+ var tmp = values.get(lhs);
+ values.set(lhs, values.get(rhs));
+ values.set(rhs, tmp);
+ var tmpExtra = extraValues.get(lhs);
+ extraValues.set(lhs, extraValues.get(rhs));
+ extraValues.set(rhs, tmpExtra);
+ }
+
+ /**
+ * Allocate storage for more buckets and store the "next gather offset"
+ * for those new buckets. We always grow the storage by whole bucket's
+ * worth of slots at a time. We never allocate space for partial buckets.
+ */
+ private void grow(int bucket) {
+ long oldMax = values.size();
+ assert oldMax % bucketSize == 0;
+
+ long newSize = BigArrays.overSize(((long) bucket + 1) * bucketSize, PageCacheRecycler.LONG_PAGE_SIZE, Long.BYTES);
+ // Round up to the next full bucket.
+ newSize = (newSize + bucketSize - 1) / bucketSize;
+ values = bigArrays.resize(values, newSize * bucketSize);
+ // Round up to the next full bucket.
+ extraValues = bigArrays.resize(extraValues, newSize * bucketSize);
+ // Set the next gather offsets for all newly allocated buckets.
+ fillGatherOffsets(oldMax);
+ }
+
+ /**
+ * Maintain the "next gather offsets" for newly allocated buckets.
+ */
+ private void fillGatherOffsets(long startingAt) {
+ int nextOffset = getBucketSize() - 1;
+ for (long bucketRoot = startingAt; bucketRoot < values.size(); bucketRoot += getBucketSize()) {
+ setNextGatherOffset(bucketRoot, nextOffset);
+ }
+ }
+
+ /**
+ * Heapify a bucket whose entries are in random order.
+ *
+ * This works by validating the heap property on each node, iterating
+ * "upwards", pushing any out of order parents "down". Check out the
+ * wikipedia
+ * entry on binary heaps for more about this.
+ *
+ *
+ * While this *looks* like it could easily be {@code O(n * log n)}, it is
+ * a fairly well studied algorithm attributed to Floyd. There's
+ * been a bunch of work that puts this at {@code O(n)}, close to 1.88n worst
+ * case.
+ *
D.E. Knuth, ”The Art of Computer Programming, Vol. 3, Sorting and Searching”
+ *
+ * @param rootIndex the index the start of the bucket
+ */
+ private void heapify(long rootIndex, int heapSize) {
+ int maxParent = heapSize / 2 - 1;
+ for (int parent = maxParent; parent >= 0; parent--) {
+ downHeap(rootIndex, parent, heapSize);
+ }
+ }
+
+ /**
+ * Sorts all the values in the heap using heap sort algorithm.
+ * This runs in {@code O(n log n)} time.
+ * @param rootIndex index of the start of the bucket
+ * @param heapSize Number of values that belong to the heap.
+ * Can be less than bucketSize.
+ * In such a case, the remaining values in range
+ * (rootIndex + heapSize, rootIndex + bucketSize)
+ * are *not* considered part of the heap.
+ */
+ private void heapSort(long rootIndex, int heapSize) {
+ while (heapSize > 0) {
+ swap(rootIndex, rootIndex + heapSize - 1);
+ heapSize--;
+ downHeap(rootIndex, 0, heapSize);
+ }
+ }
+
+ /**
+ * Correct the heap invariant of a parent and its children. This
+ * runs in {@code O(log n)} time.
+ * @param rootIndex index of the start of the bucket
+ * @param parent Index within the bucket of the parent to check.
+ * For example, 0 is the "root".
+ * @param heapSize Number of values that belong to the heap.
+ * Can be less than bucketSize.
+ * In such a case, the remaining values in range
+ * (rootIndex + heapSize, rootIndex + bucketSize)
+ * are *not* considered part of the heap.
+ */
+ private void downHeap(long rootIndex, int parent, int heapSize) {
+ while (true) {
+ long parentIndex = rootIndex + parent;
+ int worst = parent;
+ long worstIndex = parentIndex;
+ int leftChild = parent * 2 + 1;
+ long leftIndex = rootIndex + leftChild;
+ if (leftChild < heapSize) {
+ if (betterThan(values.get(worstIndex), values.get(leftIndex), extraValues.get(worstIndex), extraValues.get(leftIndex))) {
+ worst = leftChild;
+ worstIndex = leftIndex;
+ }
+ int rightChild = leftChild + 1;
+ long rightIndex = rootIndex + rightChild;
+ if (rightChild < heapSize
+ && betterThan(
+ values.get(worstIndex),
+ values.get(rightIndex),
+ extraValues.get(worstIndex),
+ extraValues.get(rightIndex)
+ )) {
+ worst = rightChild;
+ worstIndex = rightIndex;
+ }
+ }
+ if (worst == parent) {
+ break;
+ }
+ swap(worstIndex, parentIndex);
+ parent = worst;
+ }
+ }
+
+ @Override
+ public final void close() {
+ Releasables.close(values, extraValues, heapMode);
+ }
+}
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/LongFloatBucketedSort.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/LongFloatBucketedSort.java
new file mode 100644
index 0000000000000..db4d25aecfa74
--- /dev/null
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/LongFloatBucketedSort.java
@@ -0,0 +1,406 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.data.sort;
+
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.common.util.BitArray;
+import org.elasticsearch.common.util.FloatArray;
+import org.elasticsearch.common.util.LongArray;
+import org.elasticsearch.common.util.PageCacheRecycler;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.core.Releasable;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.core.Tuple;
+import org.elasticsearch.search.sort.BucketedSort;
+import org.elasticsearch.search.sort.SortOrder;
+
+import java.util.stream.IntStream;
+
+/**
+ * Aggregates the top N {@code long} values per bucket.
+ * See {@link BucketedSort} for more information.
+ * This class is generated. Edit @{code X-BucketedSort.java.st} instead of this file.
+ */
+public class LongFloatBucketedSort implements Releasable {
+
+ private final BigArrays bigArrays;
+ private final SortOrder order;
+ private final int bucketSize;
+ /**
+ * {@code true} if the bucket is in heap mode, {@code false} if
+ * it is still gathering.
+ */
+ private final BitArray heapMode;
+ /**
+ * An array containing all the values on all buckets. The structure is as follows:
+ *
+ * For each bucket, there are bucketSize elements, based on the bucket id (0, 1, 2...).
+ * Then, for each bucket, it can be in 2 states:
+ *
+ *
+ *
+ * Gather mode: All buckets start in gather mode, and remain here while they have less than bucketSize elements.
+ * In gather mode, the elements are stored in the array from the highest index to the lowest index.
+ * The lowest index contains the offset to the next slot to be filled.
+ *
+ * This allows us to insert elements in O(1) time.
+ *
+ *
+ * When the bucketSize-th element is collected, the bucket transitions to heap mode, by heapifying its contents.
+ *
+ *
+ *
+ * Heap mode: The bucket slots are organized as a min heap structure.
+ *
+ * The root of the heap is the minimum value in the bucket,
+ * which allows us to quickly discard new values that are not in the top N.
+ *
+ * It may or may not be inserted in the heap, depending on if it is better than the current root.
+ *
+ */
+ public void collect(long value, float extraValue, int bucket) {
+ long rootIndex = (long) bucket * bucketSize;
+ if (inHeapMode(bucket)) {
+ if (betterThan(value, values.get(rootIndex), extraValue, extraValues.get(rootIndex))) {
+ values.set(rootIndex, value);
+ extraValues.set(rootIndex, extraValue);
+ downHeap(rootIndex, 0, bucketSize);
+ }
+ return;
+ }
+ // Gathering mode
+ long requiredSize = rootIndex + bucketSize;
+ if (values.size() < requiredSize) {
+ grow(bucket);
+ }
+ int next = getNextGatherOffset(rootIndex);
+ assert 0 <= next && next < bucketSize
+ : "Expected next to be in the range of valid buckets [0 <= " + next + " < " + bucketSize + "]";
+ long index = next + rootIndex;
+ values.set(index, value);
+ extraValues.set(index, extraValue);
+ if (next == 0) {
+ heapMode.set(bucket);
+ heapify(rootIndex, bucketSize);
+ } else {
+ setNextGatherOffset(rootIndex, next - 1);
+ }
+ }
+
+ /**
+ * The order of the sort.
+ */
+ public SortOrder getOrder() {
+ return order;
+ }
+
+ /**
+ * The number of values to store per bucket.
+ */
+ public int getBucketSize() {
+ return bucketSize;
+ }
+
+ /**
+ * Get the first and last indexes (inclusive, exclusive) of the values for a bucket.
+ * Returns [0, 0] if the bucket has never been collected.
+ */
+ private Tuple getBucketValuesIndexes(int bucket) {
+ long rootIndex = (long) bucket * bucketSize;
+ if (rootIndex >= values.size()) {
+ // We've never seen this bucket.
+ return Tuple.tuple(0L, 0L);
+ }
+ long start = inHeapMode(bucket) ? rootIndex : (rootIndex + getNextGatherOffset(rootIndex) + 1);
+ long end = rootIndex + bucketSize;
+ return Tuple.tuple(start, end);
+ }
+
+ /**
+ * Merge the values from {@code other}'s {@code otherGroupId} into {@code groupId}.
+ */
+ public void merge(int groupId, LongFloatBucketedSort other, int otherGroupId) {
+ var otherBounds = other.getBucketValuesIndexes(otherGroupId);
+
+ // TODO: This can be improved for heapified buckets by making use of the heap structures
+ for (long i = otherBounds.v1(); i < otherBounds.v2(); i++) {
+ collect(other.values.get(i), other.extraValues.get(i), groupId);
+ }
+ }
+
+ /**
+ * Creates a block with the values from the {@code selected} groups.
+ */
+ public void toBlocks(BlockFactory blockFactory, Block[] blocks, int offset, IntVector selected) {
+ // Check if the selected groups are all empty, to avoid allocating extra memory
+ if (allSelectedGroupsAreEmpty(selected)) {
+ Block constantNullBlock = blockFactory.newConstantNullBlock(selected.getPositionCount());
+ constantNullBlock.incRef();
+ blocks[offset] = constantNullBlock;
+ blocks[offset + 1] = constantNullBlock;
+ return;
+ }
+
+ try (
+ var builder = blockFactory.newLongBlockBuilder(selected.getPositionCount());
+ var extraBuilder = blockFactory.newFloatBlockBuilder(selected.getPositionCount())
+ ) {
+ for (int s = 0; s < selected.getPositionCount(); s++) {
+ int bucket = selected.getInt(s);
+
+ var bounds = getBucketValuesIndexes(bucket);
+ var rootIndex = bounds.v1();
+ var size = bounds.v2() - bounds.v1();
+
+ if (size == 0) {
+ builder.appendNull();
+ extraBuilder.appendNull();
+ continue;
+ }
+
+ if (size == 1) {
+ builder.appendLong(values.get(rootIndex));
+ extraBuilder.appendFloat(extraValues.get(rootIndex));
+ continue;
+ }
+
+ // If we are in the gathering mode, we need to heapify before sorting.
+ if (inHeapMode(bucket) == false) {
+ heapify(rootIndex, (int) size);
+ }
+ heapSort(rootIndex, (int) size);
+
+ builder.beginPositionEntry();
+ extraBuilder.beginPositionEntry();
+ for (int i = 0; i < size; i++) {
+ builder.appendLong(values.get(rootIndex + i));
+ extraBuilder.appendFloat(extraValues.get(rootIndex + i));
+ }
+ builder.endPositionEntry();
+ extraBuilder.endPositionEntry();
+ }
+ blocks[offset] = builder.build();
+ blocks[offset + 1] = extraBuilder.build();
+ }
+ }
+
+ /**
+ * Checks if the selected groups are all empty.
+ */
+ private boolean allSelectedGroupsAreEmpty(IntVector selected) {
+ return IntStream.range(0, selected.getPositionCount()).map(selected::getInt).noneMatch(bucket -> {
+ var bounds = this.getBucketValuesIndexes(bucket);
+ var size = bounds.v2() - bounds.v1();
+ return size > 0;
+ });
+ }
+
+ /**
+ * Is this bucket a min heap {@code true} or in gathering mode {@code false}?
+ */
+ private boolean inHeapMode(int bucket) {
+ return heapMode.get(bucket);
+ }
+
+ /**
+ * Get the next index that should be "gathered" for a bucket rooted
+ * at {@code rootIndex}.
+ */
+ private int getNextGatherOffset(long rootIndex) {
+ return (int) values.get(rootIndex);
+ }
+
+ /**
+ * Set the next index that should be "gathered" for a bucket rooted
+ * at {@code rootIndex}.
+ */
+ private void setNextGatherOffset(long rootIndex, int offset) {
+ values.set(rootIndex, offset);
+ }
+
+ /**
+ * {@code true} if the entry at index {@code lhs} is "better" than
+ * the entry at {@code rhs}. "Better" in this means "lower" for
+ * {@link SortOrder#ASC} and "higher" for {@link SortOrder#DESC}.
+ */
+ private boolean betterThan(long lhs, long rhs, float lhsExtra, float rhsExtra) {
+ int res = Long.compare(lhs, rhs);
+ if (res != 0) {
+ return getOrder().reverseMul() * res < 0;
+ }
+ res = Float.compare(lhsExtra, rhsExtra);
+ return getOrder().reverseMul() * res < 0;
+ }
+
+ /**
+ * Swap the data at two indices.
+ */
+ private void swap(long lhs, long rhs) {
+ var tmp = values.get(lhs);
+ values.set(lhs, values.get(rhs));
+ values.set(rhs, tmp);
+ var tmpExtra = extraValues.get(lhs);
+ extraValues.set(lhs, extraValues.get(rhs));
+ extraValues.set(rhs, tmpExtra);
+ }
+
+ /**
+ * Allocate storage for more buckets and store the "next gather offset"
+ * for those new buckets. We always grow the storage by whole bucket's
+ * worth of slots at a time. We never allocate space for partial buckets.
+ */
+ private void grow(int bucket) {
+ long oldMax = values.size();
+ assert oldMax % bucketSize == 0;
+
+ long newSize = BigArrays.overSize(((long) bucket + 1) * bucketSize, PageCacheRecycler.LONG_PAGE_SIZE, Long.BYTES);
+ // Round up to the next full bucket.
+ newSize = (newSize + bucketSize - 1) / bucketSize;
+ values = bigArrays.resize(values, newSize * bucketSize);
+ // Round up to the next full bucket.
+ extraValues = bigArrays.resize(extraValues, newSize * bucketSize);
+ // Set the next gather offsets for all newly allocated buckets.
+ fillGatherOffsets(oldMax);
+ }
+
+ /**
+ * Maintain the "next gather offsets" for newly allocated buckets.
+ */
+ private void fillGatherOffsets(long startingAt) {
+ int nextOffset = getBucketSize() - 1;
+ for (long bucketRoot = startingAt; bucketRoot < values.size(); bucketRoot += getBucketSize()) {
+ setNextGatherOffset(bucketRoot, nextOffset);
+ }
+ }
+
+ /**
+ * Heapify a bucket whose entries are in random order.
+ *
+ * This works by validating the heap property on each node, iterating
+ * "upwards", pushing any out of order parents "down". Check out the
+ * wikipedia
+ * entry on binary heaps for more about this.
+ *
+ *
+ * While this *looks* like it could easily be {@code O(n * log n)}, it is
+ * a fairly well studied algorithm attributed to Floyd. There's
+ * been a bunch of work that puts this at {@code O(n)}, close to 1.88n worst
+ * case.
+ *
D.E. Knuth, ”The Art of Computer Programming, Vol. 3, Sorting and Searching”
+ *
+ * @param rootIndex the index the start of the bucket
+ */
+ private void heapify(long rootIndex, int heapSize) {
+ int maxParent = heapSize / 2 - 1;
+ for (int parent = maxParent; parent >= 0; parent--) {
+ downHeap(rootIndex, parent, heapSize);
+ }
+ }
+
+ /**
+ * Sorts all the values in the heap using heap sort algorithm.
+ * This runs in {@code O(n log n)} time.
+ * @param rootIndex index of the start of the bucket
+ * @param heapSize Number of values that belong to the heap.
+ * Can be less than bucketSize.
+ * In such a case, the remaining values in range
+ * (rootIndex + heapSize, rootIndex + bucketSize)
+ * are *not* considered part of the heap.
+ */
+ private void heapSort(long rootIndex, int heapSize) {
+ while (heapSize > 0) {
+ swap(rootIndex, rootIndex + heapSize - 1);
+ heapSize--;
+ downHeap(rootIndex, 0, heapSize);
+ }
+ }
+
+ /**
+ * Correct the heap invariant of a parent and its children. This
+ * runs in {@code O(log n)} time.
+ * @param rootIndex index of the start of the bucket
+ * @param parent Index within the bucket of the parent to check.
+ * For example, 0 is the "root".
+ * @param heapSize Number of values that belong to the heap.
+ * Can be less than bucketSize.
+ * In such a case, the remaining values in range
+ * (rootIndex + heapSize, rootIndex + bucketSize)
+ * are *not* considered part of the heap.
+ */
+ private void downHeap(long rootIndex, int parent, int heapSize) {
+ while (true) {
+ long parentIndex = rootIndex + parent;
+ int worst = parent;
+ long worstIndex = parentIndex;
+ int leftChild = parent * 2 + 1;
+ long leftIndex = rootIndex + leftChild;
+ if (leftChild < heapSize) {
+ if (betterThan(values.get(worstIndex), values.get(leftIndex), extraValues.get(worstIndex), extraValues.get(leftIndex))) {
+ worst = leftChild;
+ worstIndex = leftIndex;
+ }
+ int rightChild = leftChild + 1;
+ long rightIndex = rootIndex + rightChild;
+ if (rightChild < heapSize
+ && betterThan(
+ values.get(worstIndex),
+ values.get(rightIndex),
+ extraValues.get(worstIndex),
+ extraValues.get(rightIndex)
+ )) {
+ worst = rightChild;
+ worstIndex = rightIndex;
+ }
+ }
+ if (worst == parent) {
+ break;
+ }
+ swap(worstIndex, parentIndex);
+ parent = worst;
+ }
+ }
+
+ @Override
+ public final void close() {
+ Releasables.close(values, extraValues, heapMode);
+ }
+}
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/LongIntBucketedSort.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/LongIntBucketedSort.java
new file mode 100644
index 0000000000000..8e22b27791a2c
--- /dev/null
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/LongIntBucketedSort.java
@@ -0,0 +1,406 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.data.sort;
+
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.common.util.BitArray;
+import org.elasticsearch.common.util.IntArray;
+import org.elasticsearch.common.util.LongArray;
+import org.elasticsearch.common.util.PageCacheRecycler;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.core.Releasable;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.core.Tuple;
+import org.elasticsearch.search.sort.BucketedSort;
+import org.elasticsearch.search.sort.SortOrder;
+
+import java.util.stream.IntStream;
+
+/**
+ * Aggregates the top N {@code long} values per bucket.
+ * See {@link BucketedSort} for more information.
+ * This class is generated. Edit @{code X-BucketedSort.java.st} instead of this file.
+ */
+public class LongIntBucketedSort implements Releasable {
+
+ private final BigArrays bigArrays;
+ private final SortOrder order;
+ private final int bucketSize;
+ /**
+ * {@code true} if the bucket is in heap mode, {@code false} if
+ * it is still gathering.
+ */
+ private final BitArray heapMode;
+ /**
+ * An array containing all the values on all buckets. The structure is as follows:
+ *
+ * For each bucket, there are bucketSize elements, based on the bucket id (0, 1, 2...).
+ * Then, for each bucket, it can be in 2 states:
+ *
+ *
+ *
+ * Gather mode: All buckets start in gather mode, and remain here while they have less than bucketSize elements.
+ * In gather mode, the elements are stored in the array from the highest index to the lowest index.
+ * The lowest index contains the offset to the next slot to be filled.
+ *
+ * This allows us to insert elements in O(1) time.
+ *
+ *
+ * When the bucketSize-th element is collected, the bucket transitions to heap mode, by heapifying its contents.
+ *
+ *
+ *
+ * Heap mode: The bucket slots are organized as a min heap structure.
+ *
+ * The root of the heap is the minimum value in the bucket,
+ * which allows us to quickly discard new values that are not in the top N.
+ *
+ * It may or may not be inserted in the heap, depending on if it is better than the current root.
+ *
+ */
+ public void collect(long value, int extraValue, int bucket) {
+ long rootIndex = (long) bucket * bucketSize;
+ if (inHeapMode(bucket)) {
+ if (betterThan(value, values.get(rootIndex), extraValue, extraValues.get(rootIndex))) {
+ values.set(rootIndex, value);
+ extraValues.set(rootIndex, extraValue);
+ downHeap(rootIndex, 0, bucketSize);
+ }
+ return;
+ }
+ // Gathering mode
+ long requiredSize = rootIndex + bucketSize;
+ if (values.size() < requiredSize) {
+ grow(bucket);
+ }
+ int next = getNextGatherOffset(rootIndex);
+ assert 0 <= next && next < bucketSize
+ : "Expected next to be in the range of valid buckets [0 <= " + next + " < " + bucketSize + "]";
+ long index = next + rootIndex;
+ values.set(index, value);
+ extraValues.set(index, extraValue);
+ if (next == 0) {
+ heapMode.set(bucket);
+ heapify(rootIndex, bucketSize);
+ } else {
+ setNextGatherOffset(rootIndex, next - 1);
+ }
+ }
+
+ /**
+ * The order of the sort.
+ */
+ public SortOrder getOrder() {
+ return order;
+ }
+
+ /**
+ * The number of values to store per bucket.
+ */
+ public int getBucketSize() {
+ return bucketSize;
+ }
+
+ /**
+ * Get the first and last indexes (inclusive, exclusive) of the values for a bucket.
+ * Returns [0, 0] if the bucket has never been collected.
+ */
+ private Tuple getBucketValuesIndexes(int bucket) {
+ long rootIndex = (long) bucket * bucketSize;
+ if (rootIndex >= values.size()) {
+ // We've never seen this bucket.
+ return Tuple.tuple(0L, 0L);
+ }
+ long start = inHeapMode(bucket) ? rootIndex : (rootIndex + getNextGatherOffset(rootIndex) + 1);
+ long end = rootIndex + bucketSize;
+ return Tuple.tuple(start, end);
+ }
+
+ /**
+ * Merge the values from {@code other}'s {@code otherGroupId} into {@code groupId}.
+ */
+ public void merge(int groupId, LongIntBucketedSort other, int otherGroupId) {
+ var otherBounds = other.getBucketValuesIndexes(otherGroupId);
+
+ // TODO: This can be improved for heapified buckets by making use of the heap structures
+ for (long i = otherBounds.v1(); i < otherBounds.v2(); i++) {
+ collect(other.values.get(i), other.extraValues.get(i), groupId);
+ }
+ }
+
+ /**
+ * Creates a block with the values from the {@code selected} groups.
+ */
+ public void toBlocks(BlockFactory blockFactory, Block[] blocks, int offset, IntVector selected) {
+ // Check if the selected groups are all empty, to avoid allocating extra memory
+ if (allSelectedGroupsAreEmpty(selected)) {
+ Block constantNullBlock = blockFactory.newConstantNullBlock(selected.getPositionCount());
+ constantNullBlock.incRef();
+ blocks[offset] = constantNullBlock;
+ blocks[offset + 1] = constantNullBlock;
+ return;
+ }
+
+ try (
+ var builder = blockFactory.newLongBlockBuilder(selected.getPositionCount());
+ var extraBuilder = blockFactory.newIntBlockBuilder(selected.getPositionCount())
+ ) {
+ for (int s = 0; s < selected.getPositionCount(); s++) {
+ int bucket = selected.getInt(s);
+
+ var bounds = getBucketValuesIndexes(bucket);
+ var rootIndex = bounds.v1();
+ var size = bounds.v2() - bounds.v1();
+
+ if (size == 0) {
+ builder.appendNull();
+ extraBuilder.appendNull();
+ continue;
+ }
+
+ if (size == 1) {
+ builder.appendLong(values.get(rootIndex));
+ extraBuilder.appendInt(extraValues.get(rootIndex));
+ continue;
+ }
+
+ // If we are in the gathering mode, we need to heapify before sorting.
+ if (inHeapMode(bucket) == false) {
+ heapify(rootIndex, (int) size);
+ }
+ heapSort(rootIndex, (int) size);
+
+ builder.beginPositionEntry();
+ extraBuilder.beginPositionEntry();
+ for (int i = 0; i < size; i++) {
+ builder.appendLong(values.get(rootIndex + i));
+ extraBuilder.appendInt(extraValues.get(rootIndex + i));
+ }
+ builder.endPositionEntry();
+ extraBuilder.endPositionEntry();
+ }
+ blocks[offset] = builder.build();
+ blocks[offset + 1] = extraBuilder.build();
+ }
+ }
+
+ /**
+ * Checks if the selected groups are all empty.
+ */
+ private boolean allSelectedGroupsAreEmpty(IntVector selected) {
+ return IntStream.range(0, selected.getPositionCount()).map(selected::getInt).noneMatch(bucket -> {
+ var bounds = this.getBucketValuesIndexes(bucket);
+ var size = bounds.v2() - bounds.v1();
+ return size > 0;
+ });
+ }
+
+ /**
+ * Is this bucket a min heap {@code true} or in gathering mode {@code false}?
+ */
+ private boolean inHeapMode(int bucket) {
+ return heapMode.get(bucket);
+ }
+
+ /**
+ * Get the next index that should be "gathered" for a bucket rooted
+ * at {@code rootIndex}.
+ */
+ private int getNextGatherOffset(long rootIndex) {
+ return (int) values.get(rootIndex);
+ }
+
+ /**
+ * Set the next index that should be "gathered" for a bucket rooted
+ * at {@code rootIndex}.
+ */
+ private void setNextGatherOffset(long rootIndex, int offset) {
+ values.set(rootIndex, offset);
+ }
+
+ /**
+ * {@code true} if the entry at index {@code lhs} is "better" than
+ * the entry at {@code rhs}. "Better" in this means "lower" for
+ * {@link SortOrder#ASC} and "higher" for {@link SortOrder#DESC}.
+ */
+ private boolean betterThan(long lhs, long rhs, int lhsExtra, int rhsExtra) {
+ int res = Long.compare(lhs, rhs);
+ if (res != 0) {
+ return getOrder().reverseMul() * res < 0;
+ }
+ res = Integer.compare(lhsExtra, rhsExtra);
+ return getOrder().reverseMul() * res < 0;
+ }
+
+ /**
+ * Swap the data at two indices.
+ */
+ private void swap(long lhs, long rhs) {
+ var tmp = values.get(lhs);
+ values.set(lhs, values.get(rhs));
+ values.set(rhs, tmp);
+ var tmpExtra = extraValues.get(lhs);
+ extraValues.set(lhs, extraValues.get(rhs));
+ extraValues.set(rhs, tmpExtra);
+ }
+
+ /**
+ * Allocate storage for more buckets and store the "next gather offset"
+ * for those new buckets. We always grow the storage by whole bucket's
+ * worth of slots at a time. We never allocate space for partial buckets.
+ */
+ private void grow(int bucket) {
+ long oldMax = values.size();
+ assert oldMax % bucketSize == 0;
+
+ long newSize = BigArrays.overSize(((long) bucket + 1) * bucketSize, PageCacheRecycler.LONG_PAGE_SIZE, Long.BYTES);
+ // Round up to the next full bucket.
+ newSize = (newSize + bucketSize - 1) / bucketSize;
+ values = bigArrays.resize(values, newSize * bucketSize);
+ // Round up to the next full bucket.
+ extraValues = bigArrays.resize(extraValues, newSize * bucketSize);
+ // Set the next gather offsets for all newly allocated buckets.
+ fillGatherOffsets(oldMax);
+ }
+
+ /**
+ * Maintain the "next gather offsets" for newly allocated buckets.
+ */
+ private void fillGatherOffsets(long startingAt) {
+ int nextOffset = getBucketSize() - 1;
+ for (long bucketRoot = startingAt; bucketRoot < values.size(); bucketRoot += getBucketSize()) {
+ setNextGatherOffset(bucketRoot, nextOffset);
+ }
+ }
+
+ /**
+ * Heapify a bucket whose entries are in random order.
+ *
+ * This works by validating the heap property on each node, iterating
+ * "upwards", pushing any out of order parents "down". Check out the
+ * wikipedia
+ * entry on binary heaps for more about this.
+ *
+ *
+ * While this *looks* like it could easily be {@code O(n * log n)}, it is
+ * a fairly well studied algorithm attributed to Floyd. There's
+ * been a bunch of work that puts this at {@code O(n)}, close to 1.88n worst
+ * case.
+ *
D.E. Knuth, ”The Art of Computer Programming, Vol. 3, Sorting and Searching”
+ *
+ * @param rootIndex the index the start of the bucket
+ */
+ private void heapify(long rootIndex, int heapSize) {
+ int maxParent = heapSize / 2 - 1;
+ for (int parent = maxParent; parent >= 0; parent--) {
+ downHeap(rootIndex, parent, heapSize);
+ }
+ }
+
+ /**
+ * Sorts all the values in the heap using heap sort algorithm.
+ * This runs in {@code O(n log n)} time.
+ * @param rootIndex index of the start of the bucket
+ * @param heapSize Number of values that belong to the heap.
+ * Can be less than bucketSize.
+ * In such a case, the remaining values in range
+ * (rootIndex + heapSize, rootIndex + bucketSize)
+ * are *not* considered part of the heap.
+ */
+ private void heapSort(long rootIndex, int heapSize) {
+ while (heapSize > 0) {
+ swap(rootIndex, rootIndex + heapSize - 1);
+ heapSize--;
+ downHeap(rootIndex, 0, heapSize);
+ }
+ }
+
+ /**
+ * Correct the heap invariant of a parent and its children. This
+ * runs in {@code O(log n)} time.
+ * @param rootIndex index of the start of the bucket
+ * @param parent Index within the bucket of the parent to check.
+ * For example, 0 is the "root".
+ * @param heapSize Number of values that belong to the heap.
+ * Can be less than bucketSize.
+ * In such a case, the remaining values in range
+ * (rootIndex + heapSize, rootIndex + bucketSize)
+ * are *not* considered part of the heap.
+ */
+ private void downHeap(long rootIndex, int parent, int heapSize) {
+ while (true) {
+ long parentIndex = rootIndex + parent;
+ int worst = parent;
+ long worstIndex = parentIndex;
+ int leftChild = parent * 2 + 1;
+ long leftIndex = rootIndex + leftChild;
+ if (leftChild < heapSize) {
+ if (betterThan(values.get(worstIndex), values.get(leftIndex), extraValues.get(worstIndex), extraValues.get(leftIndex))) {
+ worst = leftChild;
+ worstIndex = leftIndex;
+ }
+ int rightChild = leftChild + 1;
+ long rightIndex = rootIndex + rightChild;
+ if (rightChild < heapSize
+ && betterThan(
+ values.get(worstIndex),
+ values.get(rightIndex),
+ extraValues.get(worstIndex),
+ extraValues.get(rightIndex)
+ )) {
+ worst = rightChild;
+ worstIndex = rightIndex;
+ }
+ }
+ if (worst == parent) {
+ break;
+ }
+ swap(worstIndex, parentIndex);
+ parent = worst;
+ }
+ }
+
+ @Override
+ public final void close() {
+ Releasables.close(values, extraValues, heapMode);
+ }
+}
diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/LongLongBucketedSort.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/LongLongBucketedSort.java
new file mode 100644
index 0000000000000..49a824e0c88b0
--- /dev/null
+++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/sort/LongLongBucketedSort.java
@@ -0,0 +1,405 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.data.sort;
+
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.common.util.BitArray;
+import org.elasticsearch.common.util.LongArray;
+import org.elasticsearch.common.util.PageCacheRecycler;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.core.Releasable;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.core.Tuple;
+import org.elasticsearch.search.sort.BucketedSort;
+import org.elasticsearch.search.sort.SortOrder;
+
+import java.util.stream.IntStream;
+
+/**
+ * Aggregates the top N {@code long} values per bucket.
+ * See {@link BucketedSort} for more information.
+ * This class is generated. Edit @{code X-BucketedSort.java.st} instead of this file.
+ */
+public class LongLongBucketedSort implements Releasable {
+
+ private final BigArrays bigArrays;
+ private final SortOrder order;
+ private final int bucketSize;
+ /**
+ * {@code true} if the bucket is in heap mode, {@code false} if
+ * it is still gathering.
+ */
+ private final BitArray heapMode;
+ /**
+ * An array containing all the values on all buckets. The structure is as follows:
+ *
+ * For each bucket, there are bucketSize elements, based on the bucket id (0, 1, 2...).
+ * Then, for each bucket, it can be in 2 states:
+ *
+ *
+ *
+ * Gather mode: All buckets start in gather mode, and remain here while they have less than bucketSize elements.
+ * In gather mode, the elements are stored in the array from the highest index to the lowest index.
+ * The lowest index contains the offset to the next slot to be filled.
+ *
+ * This allows us to insert elements in O(1) time.
+ *
+ *
+ * When the bucketSize-th element is collected, the bucket transitions to heap mode, by heapifying its contents.
+ *
+ *
+ *
+ * Heap mode: The bucket slots are organized as a min heap structure.
+ *
+ * The root of the heap is the minimum value in the bucket,
+ * which allows us to quickly discard new values that are not in the top N.
+ *
+ * It may or may not be inserted in the heap, depending on if it is better than the current root.
+ *
+ */
+ public void collect(long value, long extraValue, int bucket) {
+ long rootIndex = (long) bucket * bucketSize;
+ if (inHeapMode(bucket)) {
+ if (betterThan(value, values.get(rootIndex), extraValue, extraValues.get(rootIndex))) {
+ values.set(rootIndex, value);
+ extraValues.set(rootIndex, extraValue);
+ downHeap(rootIndex, 0, bucketSize);
+ }
+ return;
+ }
+ // Gathering mode
+ long requiredSize = rootIndex + bucketSize;
+ if (values.size() < requiredSize) {
+ grow(bucket);
+ }
+ int next = getNextGatherOffset(rootIndex);
+ assert 0 <= next && next < bucketSize
+ : "Expected next to be in the range of valid buckets [0 <= " + next + " < " + bucketSize + "]";
+ long index = next + rootIndex;
+ values.set(index, value);
+ extraValues.set(index, extraValue);
+ if (next == 0) {
+ heapMode.set(bucket);
+ heapify(rootIndex, bucketSize);
+ } else {
+ setNextGatherOffset(rootIndex, next - 1);
+ }
+ }
+
+ /**
+ * The order of the sort.
+ */
+ public SortOrder getOrder() {
+ return order;
+ }
+
+ /**
+ * The number of values to store per bucket.
+ */
+ public int getBucketSize() {
+ return bucketSize;
+ }
+
+ /**
+ * Get the first and last indexes (inclusive, exclusive) of the values for a bucket.
+ * Returns [0, 0] if the bucket has never been collected.
+ */
+ private Tuple getBucketValuesIndexes(int bucket) {
+ long rootIndex = (long) bucket * bucketSize;
+ if (rootIndex >= values.size()) {
+ // We've never seen this bucket.
+ return Tuple.tuple(0L, 0L);
+ }
+ long start = inHeapMode(bucket) ? rootIndex : (rootIndex + getNextGatherOffset(rootIndex) + 1);
+ long end = rootIndex + bucketSize;
+ return Tuple.tuple(start, end);
+ }
+
+ /**
+ * Merge the values from {@code other}'s {@code otherGroupId} into {@code groupId}.
+ */
+ public void merge(int groupId, LongLongBucketedSort other, int otherGroupId) {
+ var otherBounds = other.getBucketValuesIndexes(otherGroupId);
+
+ // TODO: This can be improved for heapified buckets by making use of the heap structures
+ for (long i = otherBounds.v1(); i < otherBounds.v2(); i++) {
+ collect(other.values.get(i), other.extraValues.get(i), groupId);
+ }
+ }
+
+ /**
+ * Creates a block with the values from the {@code selected} groups.
+ */
+ public void toBlocks(BlockFactory blockFactory, Block[] blocks, int offset, IntVector selected) {
+ // Check if the selected groups are all empty, to avoid allocating extra memory
+ if (allSelectedGroupsAreEmpty(selected)) {
+ Block constantNullBlock = blockFactory.newConstantNullBlock(selected.getPositionCount());
+ constantNullBlock.incRef();
+ blocks[offset] = constantNullBlock;
+ blocks[offset + 1] = constantNullBlock;
+ return;
+ }
+
+ try (
+ var builder = blockFactory.newLongBlockBuilder(selected.getPositionCount());
+ var extraBuilder = blockFactory.newLongBlockBuilder(selected.getPositionCount())
+ ) {
+ for (int s = 0; s < selected.getPositionCount(); s++) {
+ int bucket = selected.getInt(s);
+
+ var bounds = getBucketValuesIndexes(bucket);
+ var rootIndex = bounds.v1();
+ var size = bounds.v2() - bounds.v1();
+
+ if (size == 0) {
+ builder.appendNull();
+ extraBuilder.appendNull();
+ continue;
+ }
+
+ if (size == 1) {
+ builder.appendLong(values.get(rootIndex));
+ extraBuilder.appendLong(extraValues.get(rootIndex));
+ continue;
+ }
+
+ // If we are in the gathering mode, we need to heapify before sorting.
+ if (inHeapMode(bucket) == false) {
+ heapify(rootIndex, (int) size);
+ }
+ heapSort(rootIndex, (int) size);
+
+ builder.beginPositionEntry();
+ extraBuilder.beginPositionEntry();
+ for (int i = 0; i < size; i++) {
+ builder.appendLong(values.get(rootIndex + i));
+ extraBuilder.appendLong(extraValues.get(rootIndex + i));
+ }
+ builder.endPositionEntry();
+ extraBuilder.endPositionEntry();
+ }
+ blocks[offset] = builder.build();
+ blocks[offset + 1] = extraBuilder.build();
+ }
+ }
+
+ /**
+ * Checks if the selected groups are all empty.
+ */
+ private boolean allSelectedGroupsAreEmpty(IntVector selected) {
+ return IntStream.range(0, selected.getPositionCount()).map(selected::getInt).noneMatch(bucket -> {
+ var bounds = this.getBucketValuesIndexes(bucket);
+ var size = bounds.v2() - bounds.v1();
+ return size > 0;
+ });
+ }
+
+ /**
+ * Is this bucket a min heap {@code true} or in gathering mode {@code false}?
+ */
+ private boolean inHeapMode(int bucket) {
+ return heapMode.get(bucket);
+ }
+
+ /**
+ * Get the next index that should be "gathered" for a bucket rooted
+ * at {@code rootIndex}.
+ */
+ private int getNextGatherOffset(long rootIndex) {
+ return (int) values.get(rootIndex);
+ }
+
+ /**
+ * Set the next index that should be "gathered" for a bucket rooted
+ * at {@code rootIndex}.
+ */
+ private void setNextGatherOffset(long rootIndex, int offset) {
+ values.set(rootIndex, offset);
+ }
+
+ /**
+ * {@code true} if the entry at index {@code lhs} is "better" than
+ * the entry at {@code rhs}. "Better" in this means "lower" for
+ * {@link SortOrder#ASC} and "higher" for {@link SortOrder#DESC}.
+ */
+ private boolean betterThan(long lhs, long rhs, long lhsExtra, long rhsExtra) {
+ int res = Long.compare(lhs, rhs);
+ if (res != 0) {
+ return getOrder().reverseMul() * res < 0;
+ }
+ res = Long.compare(lhsExtra, rhsExtra);
+ return getOrder().reverseMul() * res < 0;
+ }
+
+ /**
+ * Swap the data at two indices.
+ */
+ private void swap(long lhs, long rhs) {
+ var tmp = values.get(lhs);
+ values.set(lhs, values.get(rhs));
+ values.set(rhs, tmp);
+ var tmpExtra = extraValues.get(lhs);
+ extraValues.set(lhs, extraValues.get(rhs));
+ extraValues.set(rhs, tmpExtra);
+ }
+
+ /**
+ * Allocate storage for more buckets and store the "next gather offset"
+ * for those new buckets. We always grow the storage by whole bucket's
+ * worth of slots at a time. We never allocate space for partial buckets.
+ */
+ private void grow(int bucket) {
+ long oldMax = values.size();
+ assert oldMax % bucketSize == 0;
+
+ long newSize = BigArrays.overSize(((long) bucket + 1) * bucketSize, PageCacheRecycler.LONG_PAGE_SIZE, Long.BYTES);
+ // Round up to the next full bucket.
+ newSize = (newSize + bucketSize - 1) / bucketSize;
+ values = bigArrays.resize(values, newSize * bucketSize);
+ // Round up to the next full bucket.
+ extraValues = bigArrays.resize(extraValues, newSize * bucketSize);
+ // Set the next gather offsets for all newly allocated buckets.
+ fillGatherOffsets(oldMax);
+ }
+
+ /**
+ * Maintain the "next gather offsets" for newly allocated buckets.
+ */
+ private void fillGatherOffsets(long startingAt) {
+ int nextOffset = getBucketSize() - 1;
+ for (long bucketRoot = startingAt; bucketRoot < values.size(); bucketRoot += getBucketSize()) {
+ setNextGatherOffset(bucketRoot, nextOffset);
+ }
+ }
+
+ /**
+ * Heapify a bucket whose entries are in random order.
+ *
+ * This works by validating the heap property on each node, iterating
+ * "upwards", pushing any out of order parents "down". Check out the
+ * wikipedia
+ * entry on binary heaps for more about this.
+ *
+ *
+ * While this *looks* like it could easily be {@code O(n * log n)}, it is
+ * a fairly well studied algorithm attributed to Floyd. There's
+ * been a bunch of work that puts this at {@code O(n)}, close to 1.88n worst
+ * case.
+ *