ESQL: Add support for exponential_histogram in median (#138402)

JonasKunz · web-flow · commit d6c009056e27 · 2025-11-24T09:15:46.000+01:00
diff --git a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java
@@ -430,11 +430,11 @@ protected boolean supportsExponentialHistograms() {
         try {
             return RestEsqlTestCase.hasCapabilities(
                 client(),
-                List.of(EsqlCapabilities.Cap.EXPONENTIAL_HISTOGRAM_PRE_TECH_PREVIEW_V1.capabilityName())
+                List.of(EsqlCapabilities.Cap.EXPONENTIAL_HISTOGRAM_PRE_TECH_PREVIEW_V2.capabilityName())
             )
                 && RestEsqlTestCase.hasCapabilities(
                     remoteClusterClient(),
-                    List.of(EsqlCapabilities.Cap.EXPONENTIAL_HISTOGRAM_PRE_TECH_PREVIEW_V1.capabilityName())
+                    List.of(EsqlCapabilities.Cap.EXPONENTIAL_HISTOGRAM_PRE_TECH_PREVIEW_V2.capabilityName())
                 );
         } catch (IOException e) {
             throw new RuntimeException(e);
diff --git a/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/EsqlSpecIT.java b/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/EsqlSpecIT.java
@@ -58,7 +58,7 @@ protected boolean supportsSourceFieldMapping() {
     protected boolean supportsExponentialHistograms() {
         return RestEsqlTestCase.hasCapabilities(
             client(),
-            List.of(EsqlCapabilities.Cap.EXPONENTIAL_HISTOGRAM_PRE_TECH_PREVIEW_V1.capabilityName())
+            List.of(EsqlCapabilities.Cap.EXPONENTIAL_HISTOGRAM_PRE_TECH_PREVIEW_V2.capabilityName())
         );
     }
 
diff --git a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/EsqlSpecTestCase.java b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/EsqlSpecTestCase.java
@@ -289,7 +289,7 @@ protected boolean supportsSourceFieldMapping() throws IOException {
     protected boolean supportsExponentialHistograms() {
         return RestEsqlTestCase.hasCapabilities(
             client(),
-            List.of(EsqlCapabilities.Cap.EXPONENTIAL_HISTOGRAM_PRE_TECH_PREVIEW_V1.capabilityName())
+            List.of(EsqlCapabilities.Cap.EXPONENTIAL_HISTOGRAM_PRE_TECH_PREVIEW_V2.capabilityName())
         );
     }
 
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/exponential_histogram.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/exponential_histogram.csv-spec
@@ -1,5 +1,5 @@
 loadFiltered
-required_capability: exponential_histogram_pre_tech_preview_v1
+required_capability: exponential_histogram_pre_tech_preview_v2
 
 FROM exp_histo_sample | WHERE STARTS_WITH(instance, "dummy") | SORT instance | KEEP instance, responseTime
 ;
@@ -17,71 +17,89 @@ dummy-zero_threshold_only | "{""scale"":0,""zero"":{""threshold"":2.0E-5}}"
 
 
 allAggsGrouped
-required_capability: exponential_histogram_pre_tech_preview_v1
+required_capability: exponential_histogram_pre_tech_preview_v2
 
 FROM exp_histo_sample 
  | EVAL instance = CASE(STARTS_WITH(instance, "dummy"), "dummy-grouped", instance)
- | STATS min = MIN(responseTime), max = MAX(responseTime), p75 = PERCENTILE(responseTime,75), sum = SUM(responseTime), avg = AVG(responseTime) BY instance
- | EVAL p75 = ROUND(p75, 7) // rounding to avoid floating point precision issues
- | KEEP instance, min, max, p75, sum, avg
+ | STATS min = MIN(responseTime), max = MAX(responseTime), median = MEDIAN(responseTime), p75 = PERCENTILE(responseTime,75), sum = SUM(responseTime), avg = AVG(responseTime) BY instance
+ | EVAL median = ROUND(median, 7), p75 = ROUND(p75, 7) // rounding to avoid floating point precision issues
+ | KEEP instance, min, max, median, p75, sum, avg
  | SORT instance
 ;
 
-instance:keyword | min:double | max:double | p75:double | sum:double         | avg:double
-dummy-grouped    | -100.0     | 50.0       | 8.3457089  | -7550.0            | -15.0398406374502
-instance-0       | 2.4E-4     | 6.786232   | 0.2608237  | 1472.744209        | 0.1665811796176903
-instance-1       | 2.17E-4    | 3.190723   | 0.0016068  | 36.198484          | 0.011137995076923077
-instance-2       | 2.2E-4     | 2.744054   | 0.0016068  | 27.706021000000003 | 0.008197047633136096
+instance:keyword | min:double | max:double | median:double | p75:double | sum:double         | avg:double
+dummy-grouped    | -100.0     | 50.0       | 0.0           | 8.3457089  | -7550.0            | -15.0398406374502
+instance-0       | 2.4E-4     | 6.786232   | 0.0211404     | 0.2608237  | 1472.744209        | 0.1665811796176903
+instance-1       | 2.17E-4    | 3.190723   | 6.469E-4      | 0.0016068  | 36.198484          | 0.011137995076923077
+instance-2       | 2.2E-4     | 2.744054   | 6.469E-4      | 0.0016068  | 27.706021000000003 | 0.008197047633136096
 ;
 
 
+allAggsFiltered
+required_capability: exponential_histogram_pre_tech_preview_v2
+
+FROM exp_histo_sample 
+ | STATS min = MIN(responseTime) WHERE instance == "instance-0",
+         max = MAX(responseTime) WHERE instance == "instance-1",
+         median = MEDIAN(responseTime) WHERE instance == "instance-2",
+         p75 = PERCENTILE(responseTime,75) WHERE instance == "instance-0",
+         sum = SUM(responseTime) WHERE instance == "instance-1",
+         avg = AVG(responseTime) WHERE instance == "instance-2"
+ | EVAL median = ROUND(median, 7), p75 = ROUND(p75, 7) // rounding to avoid floating point precision issues
+ | KEEP min, max, median, p75, sum, avg
+;
+
+min:double | max:double | median:double | p75:double | sum:double | avg:double
+2.4E-4     | 3.190723   | 6.469E-4      | 0.2608237  | 36.198484  | 0.008197047633136096
+;
+
 
 allAggsInlineGrouped
-required_capability: exponential_histogram_pre_tech_preview_v1
+required_capability: exponential_histogram_pre_tech_preview_v2
 
 FROM exp_histo_sample 
- | INLINE STATS min = MIN(responseTime), max = MAX(responseTime), p75 = PERCENTILE(responseTime,75), sum = SUM(responseTime), avg = AVG(responseTime)  BY instance
- | EVAL p75 = ROUND(p75, 7) // rounding to avoid floating point precision issues
- | KEEP instance, min, max, p75, sum , avg
+ | INLINE STATS min = MIN(responseTime), max = MAX(responseTime), median = MEDIAN(responseTime), p75 = PERCENTILE(responseTime,75), sum = SUM(responseTime), avg = AVG(responseTime)  BY instance
+ | EVAL median = ROUND(median, 7), p75 = ROUND(p75, 7) // rounding to avoid floating point precision issues
+ | KEEP instance, min, max, median, p75, sum , avg
  | SORT instance
  | Limit 15
 ;
 
-instance:keyword          | min:double | max:double | p75:double  | sum:double  | avg:double
-dummy-empty               | null       | null       | null        | null        | null
-dummy-full                | -100.0     | 50.0       | 10.6666667  | -3775.0     | -25.0
-dummy-negative_only       | -50.0      | -1.0       | -12.8729318 | -1275.0     | -25.5
-dummy-no_zero_bucket      | -100.0     | 50.0       | 10.6666667  | -3775.0     | -25.166666666666668
-dummy-positive_only       | 1.0        | 50.0       | 34.7656715  | 1275.0      | 25.5
-dummy-zero_count_only     | 0.0        | 0.0        | 0.0         | 0.0         | 0.0
-dummy-zero_threshold_only | null       | null       | null        | null        | null
-instance-0                | 2.4E-4     | 6.786232   | 0.2608237   | 1472.744209 | 0.1665811796176903
-instance-0                | 2.4E-4     | 6.786232   | 0.2608237   | 1472.744209 | 0.1665811796176903
-instance-0                | 2.4E-4     | 6.786232   | 0.2608237   | 1472.744209 | 0.1665811796176903
-instance-0                | 2.4E-4     | 6.786232   | 0.2608237   | 1472.744209 | 0.1665811796176903
-instance-0                | 2.4E-4     | 6.786232   | 0.2608237   | 1472.744209 | 0.1665811796176903
-instance-0                | 2.4E-4     | 6.786232   | 0.2608237   | 1472.744209 | 0.1665811796176903
-instance-0                | 2.4E-4     | 6.786232   | 0.2608237   | 1472.744209 | 0.1665811796176903
-instance-0                | 2.4E-4     | 6.786232   | 0.2608237   | 1472.744209 | 0.1665811796176903
+instance:keyword          | min:double | max:double | median:double | p75:double  | sum:double  | avg:double
+dummy-empty               | null       | null       | null          | null        | null        | null
+dummy-full                | -100.0     | 50.0       | -21.3333333   | 10.6666667  | -3775.0     | -25.0
+dummy-negative_only       | -50.0      | -1.0       | -24.5830421   | -12.8729318 | -1275.0     | -25.5
+dummy-no_zero_bucket      | -100.0     | 50.0       | -21.3333333   | 10.6666667  | -3775.0     | -25.166666666666668
+dummy-positive_only       | 1.0        | 50.0       | 24.5830421    | 34.7656715  | 1275.0      | 25.5
+dummy-zero_count_only     | 0.0        | 0.0        | 0.0           | 0.0         | 0.0         | 0.0
+dummy-zero_threshold_only | null       | null       | null          | null        | null        | null
+instance-0                | 2.4E-4     | 6.786232   | 0.0211404     | 0.2608237   | 1472.744209 | 0.1665811796176903
+instance-0                | 2.4E-4     | 6.786232   | 0.0211404     | 0.2608237   | 1472.744209 | 0.1665811796176903
+instance-0                | 2.4E-4     | 6.786232   | 0.0211404     | 0.2608237   | 1472.744209 | 0.1665811796176903
+instance-0                | 2.4E-4     | 6.786232   | 0.0211404     | 0.2608237   | 1472.744209 | 0.1665811796176903
+instance-0                | 2.4E-4     | 6.786232   | 0.0211404     | 0.2608237   | 1472.744209 | 0.1665811796176903
+instance-0                | 2.4E-4     | 6.786232   | 0.0211404     | 0.2608237   | 1472.744209 | 0.1665811796176903
+instance-0                | 2.4E-4     | 6.786232   | 0.0211404     | 0.2608237   | 1472.744209 | 0.1665811796176903
+instance-0                | 2.4E-4     | 6.786232   | 0.0211404     | 0.2608237   | 1472.744209 | 0.1665811796176903
 ;
 
 
 
 allAggsOnEmptyHistogram
-required_capability: exponential_histogram_pre_tech_preview_v1
+required_capability: exponential_histogram_pre_tech_preview_v2
 
 FROM exp_histo_sample | WHERE instance == "dummy-empty"
- | STATS min = MIN(responseTime), max = MAX(responseTime), p75 = PERCENTILE(responseTime,75)
- | KEEP min, max, p75
+ | STATS min = MIN(responseTime), max = MAX(responseTime), median = MEDIAN(responseTime), p75 = PERCENTILE(responseTime,75), sum = SUM(responseTime), avg = AVG(responseTime)
+ | KEEP min, max, median, p75, sum, avg
 ;
 
-min:double | max:double | p75:double
-NULL       | NULL       | NULL
+min:double | max:double | median:double | p75:double | sum:double | avg:double
+NULL       | NULL       | NULL          | NULL       | NULL       | NULL
 ;
 
 
 histoAsCaseValue
-required_capability: exponential_histogram_pre_tech_preview_v1
+required_capability: exponential_histogram_pre_tech_preview_v2
 
 FROM exp_histo_sample 
  | INLINE STATS p50 = PERCENTILE(responseTime, 50) BY instance, @timestamp
@@ -95,7 +113,7 @@ filteredCount:long
 ;
 
 ungroupedPercentiles
-required_capability: exponential_histogram_pre_tech_preview_v1
+required_capability: exponential_histogram_pre_tech_preview_v2
 
 FROM exp_histo_sample | WHERE NOT STARTS_WITH(instance, "dummy") 
  | STATS p0 = PERCENTILE(responseTime,0),  p50 = PERCENTILE(responseTime,50),  p99 = PERCENTILE(responseTime, 99),  p100 = PERCENTILE(responseTime,100)
@@ -110,7 +128,7 @@ p0:double | p50:double | p99:double | p100:double
 
 
 groupedPercentiles
-required_capability: exponential_histogram_pre_tech_preview_v1
+required_capability: exponential_histogram_pre_tech_preview_v2
 
 FROM exp_histo_sample | WHERE NOT STARTS_WITH(instance, "dummy") 
  | STATS p0 = PERCENTILE(responseTime,0),  p50 = PERCENTILE(responseTime,50),  p99 = PERCENTILE(responseTime, 99),  p100 = PERCENTILE(responseTime,100) BY instance
@@ -128,7 +146,7 @@ instance-2       | 2.2E-4    | 6.469E-4   | 0.0857672  | 2.7059714542564097
 
 
 percentileOnEmptyHistogram
-required_capability: exponential_histogram_pre_tech_preview_v1
+required_capability: exponential_histogram_pre_tech_preview_v2
 
 FROM exp_histo_sample | WHERE instance == "dummy-empty"
  | STATS  p50 = PERCENTILE(responseTime,50)
@@ -142,7 +160,7 @@ NULL
 
 
 ungroupedMinMax
-required_capability: exponential_histogram_pre_tech_preview_v1
+required_capability: exponential_histogram_pre_tech_preview_v2
 
 FROM exp_histo_sample | WHERE NOT STARTS_WITH(instance, "dummy") 
  | STATS min = MIN(responseTime),  max = MAX(responseTime)
@@ -156,7 +174,7 @@ min:double | max:double
 
 
 groupedMinMax
-required_capability: exponential_histogram_pre_tech_preview_v1
+required_capability: exponential_histogram_pre_tech_preview_v2
 
 FROM exp_histo_sample | WHERE NOT STARTS_WITH(instance, "dummy") 
  | STATS min = MIN(responseTime),  max = MAX(responseTime) BY instance
@@ -173,7 +191,7 @@ instance-2       | 2.2E-4     | 2.744054
 
 
 minMaxOnEmptyHistogram
-required_capability: exponential_histogram_pre_tech_preview_v1
+required_capability: exponential_histogram_pre_tech_preview_v2
 
 FROM exp_histo_sample | WHERE instance == "dummy-empty"
  | STATS  min = MIN(responseTime), max = MAX(responseTime)
@@ -186,7 +204,7 @@ NULL       | NULL
 
 
 ungroupedAvg
-required_capability: exponential_histogram_pre_tech_preview_v1
+required_capability: exponential_histogram_pre_tech_preview_v2
 
 FROM exp_histo_sample | WHERE NOT STARTS_WITH(instance, "dummy") 
  | STATS avg = AVG(responseTime)
@@ -199,7 +217,7 @@ avg:double
 
 
 groupedAvg
-required_capability: exponential_histogram_pre_tech_preview_v1
+required_capability: exponential_histogram_pre_tech_preview_v2
 
 FROM exp_histo_sample | WHERE NOT STARTS_WITH(instance, "dummy") 
  | STATS avg = AVG(responseTime) BY instance
@@ -215,7 +233,7 @@ instance-2       | 0.008197047633136096
 
 
 avgOnEmptyHistogram
-required_capability: exponential_histogram_pre_tech_preview_v1
+required_capability: exponential_histogram_pre_tech_preview_v2
 
 FROM exp_histo_sample | WHERE instance == "dummy-empty"
  | STATS  avg = AVG(responseTime)
@@ -228,7 +246,7 @@ NULL
 
 
 ungroupedSum
-required_capability: exponential_histogram_pre_tech_preview_v1
+required_capability: exponential_histogram_pre_tech_preview_v2
 
 FROM exp_histo_sample | WHERE NOT STARTS_WITH(instance, "dummy") 
  | STATS sum = SUM(responseTime)
@@ -241,7 +259,7 @@ sum:double
 
 
 groupedSum
-required_capability: exponential_histogram_pre_tech_preview_v1
+required_capability: exponential_histogram_pre_tech_preview_v2
 
 FROM exp_histo_sample | WHERE NOT STARTS_WITH(instance, "dummy") 
  | STATS sum = SUM(responseTime) BY instance
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
@@ -1558,7 +1558,7 @@ public enum Cap {
          * When implementing changes on this type, we'll simply increment the version suffix at the end to prevent bwc tests from running.
          * As soon as we move into tech preview, we'll replace this capability with a "EXPONENTIAL_HISTOGRAM_TECH_PREVIEW" one.
          */
-        EXPONENTIAL_HISTOGRAM_PRE_TECH_PREVIEW_V1(EXPONENTIAL_HISTOGRAM_FEATURE_FLAG),
+        EXPONENTIAL_HISTOGRAM_PRE_TECH_PREVIEW_V2(EXPONENTIAL_HISTOGRAM_FEATURE_FLAG),
 
         /**
          * Create new block when filtering OrdinalBytesRefBlock
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Median.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Median.java
@@ -59,7 +59,7 @@ public Median(
         Source source,
         @Param(
             name = "number",
-            type = { "double", "integer", "long" },
+            type = { "double", "integer", "long", "exponential_histogram" },
             description = "Expression that outputs values to calculate the median of."
         ) Expression field
     ) {
@@ -74,9 +74,10 @@ public Median(Source source, Expression field, Expression filter, Expression win
     protected Expression.TypeResolution resolveType() {
         return isType(
             field(),
-            dt -> dt.isNumeric() && dt != DataType.UNSIGNED_LONG,
+            dt -> dt.isNumeric() && dt != DataType.UNSIGNED_LONG || dt == DataType.EXPONENTIAL_HISTOGRAM,
             sourceText(),
             DEFAULT,
+            "exponential_histogram",
             "numeric except unsigned_long or counter types"
         );
     }
@@ -115,7 +116,7 @@ public Expression surrogate() {
         var s = source();
         var field = field();
 
-        return field.foldable()
+        return field.foldable() && field.dataType() != DataType.EXPONENTIAL_HISTOGRAM
             ? new MvMedian(s, new ToDouble(s, field))
             : new Percentile(source(), field(), filter(), window(), new Literal(source(), (int) QuantileStates.MEDIAN, DataType.INTEGER));
     }
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java
@@ -2050,7 +2050,7 @@ public void testUnsupportedTypesInStats() {
              found value [x] type [unsigned_long]
             line 2:20: argument of [count_distinct(x)] must be [any exact type except unsigned_long, _source, or counter types],\
              found value [x] type [unsigned_long]
-            line 2:47: argument of [median(x)] must be [numeric except unsigned_long or counter types],\
+            line 2:47: argument of [median(x)] must be [exponential_histogram or numeric except unsigned_long or counter types],\
              found value [x] type [unsigned_long]
             line 2:58: argument of [median_absolute_deviation(x)] must be [numeric except unsigned_long or counter types],\
              found value [x] type [unsigned_long]
@@ -2068,7 +2068,7 @@ public void testUnsupportedTypesInStats() {
             line 2:10: argument of [avg(x)] must be [aggregate_metric_double,\
              exponential_histogram or numeric except unsigned_long or counter types],\
              found value [x] type [version]
-            line 2:18: argument of [median(x)] must be [numeric except unsigned_long or counter types],\
+            line 2:18: argument of [median(x)] must be [exponential_histogram or numeric except unsigned_long or counter types],\
              found value [x] type [version]
             line 2:29: argument of [median_absolute_deviation(x)] must be [numeric except unsigned_long or counter types],\
              found value [x] type [version]
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/MedianTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/MedianTests.java
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/PercentileTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/PercentileTests.java

Original file line number	Diff line number	Diff line change
`@@ -58,7 +58,7 @@ protected boolean supportsSourceFieldMapping() {`
`58`	`58`	`protected boolean supportsExponentialHistograms() {`
`59`	`59`	`return RestEsqlTestCase.hasCapabilities(`
`60`	`60`	`client(),`
`61`		`- List.of(EsqlCapabilities.Cap.EXPONENTIAL_HISTOGRAM_PRE_TECH_PREVIEW_V1.capabilityName())`
	`61`	`+ List.of(EsqlCapabilities.Cap.EXPONENTIAL_HISTOGRAM_PRE_TECH_PREVIEW_V2.capabilityName())`
`62`	`62`	`);`
`63`	`63`	`}`
`64`	`64`
Original file line number	Diff line number	Diff line change
`@@ -289,7 +289,7 @@ protected boolean supportsSourceFieldMapping() throws IOException {`
`289`	`289`	`protected boolean supportsExponentialHistograms() {`
`290`	`290`	`return RestEsqlTestCase.hasCapabilities(`
`291`	`291`	`client(),`
`292`		`- List.of(EsqlCapabilities.Cap.EXPONENTIAL_HISTOGRAM_PRE_TECH_PREVIEW_V1.capabilityName())`
	`292`	`+ List.of(EsqlCapabilities.Cap.EXPONENTIAL_HISTOGRAM_PRE_TECH_PREVIEW_V2.capabilityName())`
`293`	`293`	`);`
`294`	`294`	`}`
`295`	`295`