fixed computation

pabloem · pabloem · commit 0e2de204822e · 2025-08-18T08:35:03.000-07:00
diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/RandomizedTimeSeriesIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/RandomizedTimeSeriesIT.java
@@ -49,7 +49,7 @@
 @SuppressWarnings("unchecked")
 public class RandomizedTimeSeriesIT extends AbstractEsqlIntegTestCase {
 
-    private static final Long NUM_DOCS = 1000L;
+    private static final Long NUM_DOCS = 2000L;
     private static final String DATASTREAM_NAME = "tsit_ds";
     private List<XContentBuilder> documents = null;
     private TSDataGenerationHelper dataGenerationHelper;
@@ -205,7 +205,7 @@ public void testGroupBySubset() {
                 values(metrics.gauge_hdd.bytes.used),
                 max(max_over_time(metrics.gauge_hdd.bytes.used)),
                 min(min_over_time(metrics.gauge_hdd.bytes.used)),
-                count(count_over_time(metrics.gauge_hdd.bytes.used)),
+                sum(count_over_time(metrics.gauge_hdd.bytes.used)),
                 sum(sum_over_time(metrics.gauge_hdd.bytes.used)),
                 avg(avg_over_time(metrics.gauge_hdd.bytes.used))
                 BY tbucket=bucket(@timestamp, 1 minute), %s
@@ -217,20 +217,13 @@ public void testGroupBySubset() {
                 var rowKey = getRowKey(row, dimensions, 6);
                 var docValues = valuesInWindow(groups.get(rowKey), "gauge_hdd.bytes.used");
                 // Max of int is always int, so we can safely round the result.
-                var valuesAsInts = docValues.stream().map(Integer::valueOf).toList();
+                var valuesAsInts = docValues.stream().toList();
                 assertThat(valuesAsInts, containsInAnyOrder(docValues.toArray()));
                 assertThat(row.get(1), equalTo(Math.round(aggregateValuesInWindow(docValues, Agg.MAX))));
                 assertThat(row.get(2), equalTo(Math.round(aggregateValuesInWindow(docValues, Agg.MIN))));
-                // TODO: Enable assertions after we fix the computation.
-                // assertThat(row.get(3), equalTo((long) docValues.size()));
+                assertThat(row.get(3), equalTo((long) docValues.size()));
                 assertThat(row.get(4), equalTo(aggregateValuesInWindow(docValues, Agg.SUM).longValue()));
-                // We check the expected vs ES-calculated average. We divide them to normalize the error
-                // and allow for a 20% error margin.
-                // Double esAvg = (Double) row.get(5);
-                // Double expectedAvg = aggregateValuesInWindow(docValues, Agg.AVG);
-                // var ratio = esAvg / expectedAvg;
-                // assertThat(ratio, closeTo(1, 0.25));
-
+                assertThat(row.get(5), equalTo(aggregateValuesInWindow(docValues, Agg.SUM) / (double) docValues.size()));
             }
         }
     }
@@ -248,7 +241,7 @@ public void testGroupByNothing() {
                 values(metrics.gauge_hdd.bytes.used),
                 max(max_over_time(metrics.gauge_hdd.bytes.used)),
                 min(min_over_time(metrics.gauge_hdd.bytes.used)),
-                count(count_over_time(metrics.gauge_hdd.bytes.used)),
+                sum(count_over_time(metrics.gauge_hdd.bytes.used)),
                 sum(sum_over_time(metrics.gauge_hdd.bytes.used)),
                 avg(avg_over_time(metrics.gauge_hdd.bytes.used))
                 BY tbucket=bucket(@timestamp, 1 minute)
@@ -260,19 +253,13 @@ public void testGroupByNothing() {
                 var windowStart = windowStart(row.get(6), 60);
                 var docValues = valuesInWindow(groups.get(List.of(Long.toString(windowStart))), "gauge_hdd.bytes.used");
                 // Make sure that expected timestamps and values are present
-                var valuesAsInts = docValues.stream().map(Integer::valueOf).toList();
+                var valuesAsInts = docValues.stream().toList();
                 assertThat(valuesAsInts, containsInAnyOrder(docValues.toArray()));
                 assertThat(row.get(1), equalTo(Math.round(aggregateValuesInWindow(docValues, Agg.MAX))));
                 assertThat(row.get(2), equalTo(Math.round(aggregateValuesInWindow(docValues, Agg.MIN))));
-                // TODO: Enable assertions after we fix the computation.
-                // assertThat(row.get(3), equalTo((long) docValues.size()));
+                assertThat(row.get(3), equalTo((long) docValues.size()));
                 assertThat(row.get(4), equalTo(aggregateValuesInWindow(docValues, Agg.SUM).longValue()));
-                // We check the expected vs ES-calculated average. We divide them to normalize the error
-                // and allow for a 20% error margin.
-                // Double esAvg = (Double) row.get(5);
-                // Double expectedAvg = aggregateValuesInWindow(docValues, Agg.AVG);
-                // var ratio = esAvg / expectedAvg;
-                // assertThat(ratio, closeTo(1, 0.25));
+                assertThat(row.get(5), equalTo(aggregateValuesInWindow(docValues, Agg.SUM) / (double) docValues.size()));
             }
         }
     }
diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/TSDataGenerationHelper.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/TSDataGenerationHelper.java
@@ -51,8 +51,13 @@ private static Object randomDimensionValue(String dimensionName) {
         // Metrics coming into our system have a pre-set group of attributes.
         // Making a list-to-set-to-list to ensure uniqueness.
         this.numDocs = numDocs;
-        attributesForMetrics = List.copyOf(Set.copyOf(ESTestCase.randomList(1, 300, () -> ESTestCase.randomAlphaOfLengthBetween(2, 30))));
-        numTimeSeries = ESTestCase.randomIntBetween(10, (int) Math.sqrt(numDocs));
+        var maxAttributes = (int) Math.sqrt(numDocs);
+        attributesForMetrics = List.copyOf(
+            Set.copyOf(ESTestCase.randomList(1, maxAttributes, () -> ESTestCase.randomAlphaOfLengthBetween(2, 30)))
+        );
+        var maxTimeSeries = (int) Math.sqrt(numDocs);
+        var minTimeSeries = Math.max(1, maxTimeSeries / 4);
+        numTimeSeries = ESTestCase.randomIntBetween(minTimeSeries, maxTimeSeries);
         // allTimeSeries contains the list of dimension-values for each time series.
         List<List<Tuple<String, Object>>> allTimeSeries = IntStream.range(0, numTimeSeries).mapToObj(tsIdx -> {
             List<String> dimensionsInMetric = ESTestCase.randomNonEmptySubsetOf(attributesForMetrics);